Fix umami drifter count queries

Have to fill in all possible groups and sim types before doing the
transpose to get the right table.
This commit is contained in:
2025-11-18 18:15:31 +01:00
parent 74600abddc
commit 9720de1a67
4 changed files with 102 additions and 17 deletions
@@ -0,0 +1,56 @@
-- NOTE(simkir): Materialized view called by the crosstab to pivot sim types to cols
create materialized view weekly_sim_submit_count_v2 as
with
-- NOTE(simkir): Select all groups and sim kinds
group_and_sims as (
select
"group",
sim_type
from
(
select substring(distinct_id similar '%#"@%#"' escape '#') as group
from session
where distinct_id is not null and distinct_id like '%@%'
group by substring(distinct_id similar '%#"@%#"' escape '#')
)
cross join
(
select distinct string_value as sim_type
from event_data
where event_data.data_key = 'kind'
)
),
events as (
select
substring(session.distinct_id similar '%#"@%#"' escape '#') as group,
event_data.string_value as sim_type,
event_data.website_event_id as event_id,
event_data.created_at
from
website_event
join session on session.session_id = website_event.session_id
left outer join event_data on event_data.website_event_id = website_event.event_id
where
website_event.website_id = '16e7d807-4db5-45fd-92a9-27393445a153'
and website_event.event_type = 2
and website_event.event_name = 'mapster-submit-drifters'
and event_data.data_key = 'kind'
)
select
group_and_sims.group,
group_and_sims.sim_type,
count(events.event_id)
from
group_and_sims
-- NOTE(simkir): Allow missing events
left outer join events on
-- NOTE(simkir): Join on the unique combinations of group name and sim
-- kind which we've already fetched in `group_and_sims`.
events.group = group_and_sims.group
and events.sim_type = group_and_sims.sim_type
-- NOTE(simkir): Filter here in the join to, again, allow missing rows
and events.created_at between current_timestamp - '7 days'::interval and current_timestamp
group by
group_and_sims.group,
group_and_sims.sim_type
+18
View File
@@ -0,0 +1,18 @@
#!/usr/bin/env bash
if [ $# -ne 1 ]
then
echo "Usage: $0 <file>.sql"
exit 1
fi
file=$1
if [ ! -e $file ]
then
echo "file $file does not exist"
exit 1
fi
cat $file | kubectl exec -i svc/prod-umami-db-rw -c postgres -- psql app
@@ -1,24 +1,35 @@
-- Pivots the sim count to use the sim kinds as columns. This is what is called
-- in grafana. If we get more sim kinds, they must be added here, and also in grafana.
--
-- group_name | transport | lice | virus | watercontact | sedimentation
-- --------------+-----------+------+-------+--------------+---------------
-- @mowi.com | 1 | | | |
-- @oceanbox.io | 1 | 1 | 5 | 6 |
-- @scaleaq.com | 7 | | | |
-- (3 rows)
-- group_name | lice | sedimentation | transport | virus | watercontact
-- ---------------------+------+---------------+-----------+-------+--------------
-- @aqua-kompetanse.no | 0 | 0 | 0 | 0 | 0
-- @bjoroya.no | 0 | 0 | 0 | 0 | 0
-- @dnv.com | 0 | 0 | 0 | 0 | 0
-- @gmail.com | 0 | 0 | 0 | 0 | 0
-- @leroy.no | 0 | 0 | 0 | 0 | 0
-- @leroyseafood.com | 0 | 0 | 0 | 0 | 0
-- @met.no | 0 | 0 | 0 | 0 | 0
-- @mowi.com | 0 | 0 | 0 | 0 | 1
-- @mowi.no | 0 | 0 | 0 | 0 | 0
-- @oceanbox.io | 2 | 0 | 1 | 5 | 6
-- @oceanpro.no | 0 | 0 | 0 | 0 | 0
-- @ohshavbruk.no | 0 | 0 | 0 | 0 | 0
-- @salmar.no | 0 | 0 | 0 | 0 | 0
-- @scaleaq.com | 7 | 0 | 0 | 0 | 0
-- (14 rows)
SELECT
*
FROM
crosstab_weekly_sim_submit_count(
'SELECT * FROM weekly_sim_submit_count
WHERE sim_type IN (
''transport'',
''lice'',
''virus'',
''watercontact'',
''sedimentation''
)
ORDER BY 1, 2'
crosstab(
'SELECT "group", sim_type, count::text FROM weekly_sim_submit_count_v2 ORDER BY 1, 2',
'SELECT DISTINCT sim_type FROM weekly_sim_submit_count_v2 ORDER BY 1'
) AS ct (
group_name text,
lice bigint,
sedimentation bigint,
transport bigint,
virus bigint,
watercontact bigint
)
@@ -10,9 +10,9 @@ WHERE
website_event.website_id = '16e7d807-4db5-45fd-92a9-27393445a153'
AND website_event.event_type = 2
AND website_event.event_name = 'mapster-submit-drifters'
AND website_event.created_at BETWEEN CURRENT_TIMESTAMP - '7 days'::interval AND CURRENT_TIMESTAMP
AND session.distinct_id IS NOT NULL
AND event_data.data_key = 'kind'
AND event_data.created_at BETWEEN CURRENT_TIMESTAMP - '7 days'::interval AND CURRENT_TIMESTAMP
GROUP BY
substring(session.distinct_id SIMILAR '%#"@%#"' ESCAPE '#'),
event_data.string_value;