diff --git a/posthog/api/property_definition.py b/posthog/api/property_definition.py index 84b7a03f030bd..3f4c518005a05 100644 --- a/posthog/api/property_definition.py +++ b/posthog/api/property_definition.py @@ -1,8 +1,9 @@ import dataclasses import json from typing import Any, Optional, cast +from django.db.models.functions import Coalesce -from django.db import connection +from django.db import connection, models from loginas.utils import is_impersonated_session from rest_framework import mixins, request, response, serializers, status, viewsets from posthog.api.utils import action @@ -99,7 +100,7 @@ class QueryContext: The raw query is used to both query and count these results """ - team_id: int + project_id: int table: str property_definition_fields: str property_definition_table: str @@ -232,7 +233,7 @@ def with_search(self, search_query: str, search_kwargs: dict) -> "QueryContext": return dataclasses.replace( self, search_query=search_query, - params={**self.params, "team_id": self.team_id, **search_kwargs}, + params={**self.params, "project_id": self.project_id, **search_kwargs}, ) def with_excluded_properties(self, excluded_properties: Optional[str], type: str) -> "QueryContext": @@ -264,7 +265,7 @@ def as_sql(self, order_by_verified: bool): SELECT {self.property_definition_fields}, {self.event_property_field} AS is_seen_on_filtered_events FROM {self.table} {self._join_on_event_property()} - WHERE {self.property_definition_table}.team_id = %(team_id)s + WHERE coalesce({self.property_definition_table}.project_id, {self.property_definition_table}.team_id) = %(project_id)s AND type = %(type)s AND coalesce(group_type_index, -1) = %(group_type_index)s {self.excluded_properties_filter} @@ -281,7 +282,7 @@ def as_count_sql(self): SELECT count(*) as full_count FROM {self.table} {self._join_on_event_property()} - WHERE {self.property_definition_table}.team_id = %(team_id)s + WHERE coalesce({self.property_definition_table}.project_id, {self.property_definition_table}.team_id) = %(project_id)s AND type = %(type)s AND coalesce(group_type_index, -1) = %(group_type_index)s {self.excluded_properties_filter} {self.name_filter} {self.numerical_filter} {self.search_query} {self.event_property_filter} {self.is_feature_flag_filter} @@ -296,7 +297,7 @@ def _join_on_event_property(self): {self.event_property_join_type} ( SELECT DISTINCT property FROM posthog_eventproperty - WHERE team_id = %(team_id)s {self.event_name_join_filter} + WHERE coalesce(project_id, team_id) = %(project_id)s {self.event_name_join_filter} ) {self.posthog_eventproperty_table_join_alias} ON {self.posthog_eventproperty_table_join_alias}.property = name """ @@ -537,7 +538,7 @@ def dangerously_get_queryset(self): query_context = ( QueryContext( - team_id=self.team_id, + project_id=self.project_id, table=( "ee_enterprisepropertydefinition FULL OUTER JOIN posthog_propertydefinition ON posthog_propertydefinition.id=ee_enterprisepropertydefinition.propertydefinition_ptr_id" if use_enterprise_taxonomy @@ -621,8 +622,10 @@ def seen_together(self, request: request.Request, *args: Any, **kwargs: Any) -> serializer = SeenTogetherQuerySerializer(data=request.GET) serializer.is_valid(raise_exception=True) - matches = EventProperty.objects.filter( - team_id=self.team_id, + matches = EventProperty.objects.alias( + effective_project_id=Coalesce("project_id", "team_id", output_field=models.BigIntegerField()) + ).filter( + effective_project_id=self.project_id, # type: ignore event__in=serializer.validated_data["event_names"], property=serializer.validated_data["property_name"], ) diff --git a/posthog/api/utils.py b/posthog/api/utils.py index 79c4c1028d1d2..52e4cf925cc48 100644 --- a/posthog/api/utils.py +++ b/posthog/api/utils.py @@ -313,7 +313,7 @@ def create_event_definitions_sql( SELECT {",".join(event_definition_fields)} FROM posthog_eventdefinition {enterprise_join} - WHERE team_id = %(project_id)s {conditions} + WHERE coalesce(project_id, team_id) = %(project_id)s {conditions} ORDER BY {additional_ordering}name ASC """ diff --git a/posthog/migrations/0532_taxonomy_unique_on_project.py b/posthog/migrations/0532_taxonomy_unique_on_project.py new file mode 100644 index 0000000000000..2ba5b98d2652c --- /dev/null +++ b/posthog/migrations/0532_taxonomy_unique_on_project.py @@ -0,0 +1,119 @@ +# Generated by Django 4.2.15 on 2024-12-09 15:51 + +from django.db import migrations +from django.db import models +import django.db.models.functions.comparison +import posthog.models.utils +from django.contrib.postgres.operations import AddIndexConcurrently, RemoveIndexConcurrently + + +class Migration(migrations.Migration): + atomic = False # Added to support concurrent index creation + dependencies = [("posthog", "0531_alter_hogfunction_type")] + + operations = [ + # First clean up rows that would fail the project-based unique constraints we're adding + migrations.RunSQL( + sql=""" + DELETE FROM posthog_propertydefinition + WHERE team_id IN ( + SELECT id FROM posthog_team WHERE id != project_id + );""", + reverse_sql=migrations.RunSQL.noop, + elidable=True, + ), + migrations.RunSQL( + sql=""" + DELETE FROM posthog_eventdefinition + WHERE team_id IN ( + SELECT id FROM posthog_team WHERE id != project_id + );""", + reverse_sql=migrations.RunSQL.noop, + elidable=True, + ), + migrations.RunSQL( + sql=""" + DELETE FROM posthog_eventproperty + WHERE team_id IN ( + SELECT id FROM posthog_team WHERE id != project_id + );""", + reverse_sql=migrations.RunSQL.noop, + elidable=True, + ), + # Remove misguided `project_id`-only indexes from the previous migration + RemoveIndexConcurrently( + model_name="eventproperty", + name="posthog_eve_proj_id_22de03_idx", + ), + RemoveIndexConcurrently( + model_name="eventproperty", + name="posthog_eve_proj_id_26dbfb_idx", + ), + RemoveIndexConcurrently( + model_name="propertydefinition", + name="index_property_def_query_proj", + ), + RemoveIndexConcurrently( + model_name="propertydefinition", + name="posthog_pro_project_3583d2_idx", + ), + # Add new useful indexes using `coalesce(project_id, team_id)` + AddIndexConcurrently( + model_name="eventproperty", + index=models.Index( + django.db.models.functions.comparison.Coalesce(models.F("project_id"), models.F("team_id")), + models.F("event"), + name="posthog_eve_proj_id_22de03_idx", + ), + ), + AddIndexConcurrently( + model_name="eventproperty", + index=models.Index( + django.db.models.functions.comparison.Coalesce(models.F("project_id"), models.F("team_id")), + models.F("property"), + name="posthog_eve_proj_id_26dbfb_idx", + ), + ), + AddIndexConcurrently( + model_name="propertydefinition", + index=models.Index( + django.db.models.functions.comparison.Coalesce(models.F("project_id"), models.F("team_id")), + models.F("type"), + django.db.models.functions.comparison.Coalesce(models.F("group_type_index"), -1), + models.OrderBy(models.F("query_usage_30_day"), descending=True, nulls_last=True), + models.OrderBy(models.F("name")), + name="index_property_def_query_proj", + ), + ), + AddIndexConcurrently( + model_name="propertydefinition", + index=models.Index( + django.db.models.functions.comparison.Coalesce(models.F("project_id"), models.F("team_id")), + models.F("type"), + models.F("is_numerical"), + name="posthog_pro_project_3583d2_idx", + ), + ), + migrations.AddConstraint( + model_name="eventdefinition", + constraint=posthog.models.utils.UniqueConstraintByExpression( + concurrently=True, expression="(coalesce(project_id, team_id), name)", name="event_definition_proj_uniq" + ), + ), + migrations.AddConstraint( + model_name="eventproperty", + constraint=posthog.models.utils.UniqueConstraintByExpression( + concurrently=True, + expression="(coalesce(project_id, team_id), event, property)", + name="posthog_event_property_unique_proj_event_property", + ), + ), + migrations.AddConstraint( + model_name="propertydefinition", + constraint=posthog.models.utils.UniqueConstraintByExpression( + concurrently=True, + expression="(coalesce(project_id, team_id), name, type, coalesce(group_type_index, -1))", + name="posthog_propdef_proj_uniq", + ), + ), + ] diff --git a/posthog/migrations/max_migration.txt b/posthog/migrations/max_migration.txt index 28e2442e211b6..01fc03d62a8a0 100644 --- a/posthog/migrations/max_migration.txt +++ b/posthog/migrations/max_migration.txt @@ -1 +1 @@ -0531_alter_hogfunction_type +0532_taxonomy_unique_on_project diff --git a/posthog/models/event_definition.py b/posthog/models/event_definition.py index 47a14035652ed..9d1d574631858 100644 --- a/posthog/models/event_definition.py +++ b/posthog/models/event_definition.py @@ -3,7 +3,7 @@ from django.utils import timezone from posthog.models.team import Team -from posthog.models.utils import UUIDModel +from posthog.models.utils import UUIDModel, UniqueConstraintByExpression class EventDefinition(UUIDModel): @@ -27,7 +27,6 @@ class EventDefinition(UUIDModel): volume_30_day = models.IntegerField(default=None, null=True) class Meta: - unique_together = ("team", "name") indexes = [ # Index on project_id foreign key models.Index(fields=["project"], name="posthog_eve_proj_id_f93fcbb0"), @@ -37,6 +36,14 @@ class Meta: opclasses=["gin_trgm_ops"], ), # To speed up DB-based fuzzy searching ] + unique_together = ("team", "name") + constraints = [ + UniqueConstraintByExpression( + concurrently=True, + name="event_definition_proj_uniq", + expression="(coalesce(project_id, team_id), name)", + ) + ] def __str__(self) -> str: return f"{self.name} / {self.team.name}" diff --git a/posthog/models/event_property.py b/posthog/models/event_property.py index 61f5d27baace6..4dc05a98e5701 100644 --- a/posthog/models/event_property.py +++ b/posthog/models/event_property.py @@ -1,7 +1,9 @@ from django.db import models from posthog.models.team import Team -from posthog.models.utils import sane_repr +from posthog.models.utils import UniqueConstraintByExpression, sane_repr +from django.db.models.expressions import F +from django.db.models.functions import Coalesce class EventProperty(models.Model): @@ -15,15 +17,20 @@ class Meta: models.UniqueConstraint( fields=["team", "event", "property"], name="posthog_event_property_unique_team_event_property", - ) + ), + UniqueConstraintByExpression( + concurrently=True, + name="posthog_event_property_unique_proj_event_property", + expression="(coalesce(project_id, team_id), event, property)", + ), ] indexes = [ # Index on project_id foreign key models.Index(fields=["project"], name="posthog_eve_proj_id_dd2337d2"), models.Index(fields=["team", "event"]), - models.Index(fields=["project", "event"], name="posthog_eve_proj_id_22de03_idx"), + models.Index(Coalesce(F("project_id"), F("team_id")), F("event"), name="posthog_eve_proj_id_22de03_idx"), models.Index(fields=["team", "property"]), - models.Index(fields=["project", "property"], name="posthog_eve_proj_id_26dbfb_idx"), + models.Index(Coalesce(F("project_id"), F("team_id")), F("property"), name="posthog_eve_proj_id_26dbfb_idx"), ] __repr__ = sane_repr("event", "property", "team_id") diff --git a/posthog/models/property_definition.py b/posthog/models/property_definition.py index 08e6cd5d1bed1..b4cc20797c89a 100644 --- a/posthog/models/property_definition.py +++ b/posthog/models/property_definition.py @@ -83,7 +83,7 @@ class Meta: name="index_property_def_query", ), models.Index( - F("project_id"), + Coalesce(F("project_id"), F("team_id")), F("type"), Coalesce(F("group_type_index"), -1), F("query_usage_30_day").desc(nulls_last=True), @@ -93,7 +93,12 @@ class Meta: # creates an index pganalyze identified as missing # https://app.pganalyze.com/servers/i35ydkosi5cy5n7tly45vkjcqa/checks/index_advisor/missing_index/15282978 models.Index(fields=["team_id", "type", "is_numerical"]), - models.Index(fields=["project_id", "type", "is_numerical"], name="posthog_pro_project_3583d2_idx"), + models.Index( + Coalesce(F("project_id"), F("team_id")), + F("type"), + F("is_numerical"), + name="posthog_pro_project_3583d2_idx", + ), GinIndex( name="index_property_definition_name", fields=["name"], @@ -113,6 +118,11 @@ class Meta: name="posthog_propertydefinition_uniq", expression="(team_id, name, type, coalesce(group_type_index, -1))", ), + UniqueConstraintByExpression( + concurrently=True, + name="posthog_propdef_proj_uniq", + expression="(coalesce(project_id, team_id), name, type, coalesce(group_type_index, -1))", + ), ] def __str__(self) -> str: diff --git a/rust/property-defs-rs/.sqlx/query-04abdef9c07ae1a30bb6f22abcfb4dcdf2e218e48e0fd8a247e1b7ae0f04aee3.json b/rust/property-defs-rs/.sqlx/query-dc7d1647bdb6fcaff1b402b5ea8a376473bd31ace71b4ab6114a39b5aa141f6f.json similarity index 63% rename from rust/property-defs-rs/.sqlx/query-04abdef9c07ae1a30bb6f22abcfb4dcdf2e218e48e0fd8a247e1b7ae0f04aee3.json rename to rust/property-defs-rs/.sqlx/query-dc7d1647bdb6fcaff1b402b5ea8a376473bd31ace71b4ab6114a39b5aa141f6f.json index 5c8b96e695c28..478e535f3d462 100644 --- a/rust/property-defs-rs/.sqlx/query-04abdef9c07ae1a30bb6f22abcfb4dcdf2e218e48e0fd8a247e1b7ae0f04aee3.json +++ b/rust/property-defs-rs/.sqlx/query-dc7d1647bdb6fcaff1b402b5ea8a376473bd31ace71b4ab6114a39b5aa141f6f.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "\n INSERT INTO posthog_propertydefinition (id, name, type, group_type_index, is_numerical, volume_30_day, query_usage_30_day, team_id, project_id, property_type)\n VALUES ($1, $2, $3, $4, $5, NULL, NULL, $6, $7, $8)\n ON CONFLICT (team_id, name, type, coalesce(group_type_index, -1))\n DO UPDATE SET property_type=EXCLUDED.property_type WHERE posthog_propertydefinition.property_type IS NULL\n ", + "query": "\n INSERT INTO posthog_propertydefinition (id, name, type, group_type_index, is_numerical, volume_30_day, query_usage_30_day, team_id, project_id, property_type)\n VALUES ($1, $2, $3, $4, $5, NULL, NULL, $6, $7, $8)\n ON CONFLICT (coalesce(project_id, team_id), name, type, coalesce(group_type_index, -1))\n DO UPDATE SET property_type=EXCLUDED.property_type WHERE posthog_propertydefinition.property_type IS NULL\n ", "describe": { "columns": [], "parameters": { @@ -17,5 +17,5 @@ }, "nullable": [] }, - "hash": "04abdef9c07ae1a30bb6f22abcfb4dcdf2e218e48e0fd8a247e1b7ae0f04aee3" + "hash": "dc7d1647bdb6fcaff1b402b5ea8a376473bd31ace71b4ab6114a39b5aa141f6f" } diff --git a/rust/property-defs-rs/.sqlx/query-2b9a8c4b8d323e1673d805125b4073799ecba84594ca04cfb24481cffbf6f6ca.json b/rust/property-defs-rs/.sqlx/query-ed7f5f3607948d5f5d9fca2b513822cc3ed70f0c5c14593f3d32ef6dc2ae82f7.json similarity index 63% rename from rust/property-defs-rs/.sqlx/query-2b9a8c4b8d323e1673d805125b4073799ecba84594ca04cfb24481cffbf6f6ca.json rename to rust/property-defs-rs/.sqlx/query-ed7f5f3607948d5f5d9fca2b513822cc3ed70f0c5c14593f3d32ef6dc2ae82f7.json index 785a13a6d1ce7..4acc68001bcdf 100644 --- a/rust/property-defs-rs/.sqlx/query-2b9a8c4b8d323e1673d805125b4073799ecba84594ca04cfb24481cffbf6f6ca.json +++ b/rust/property-defs-rs/.sqlx/query-ed7f5f3607948d5f5d9fca2b513822cc3ed70f0c5c14593f3d32ef6dc2ae82f7.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "\n INSERT INTO posthog_eventdefinition (id, name, volume_30_day, query_usage_30_day, team_id, project_id, last_seen_at, created_at)\n VALUES ($1, $2, NULL, NULL, $3, $4, $5, NOW()) ON CONFLICT\n ON CONSTRAINT posthog_eventdefinition_team_id_name_80fa0b87_uniq\n DO UPDATE SET last_seen_at = $5\n ", + "query": "\n INSERT INTO posthog_eventdefinition (id, name, volume_30_day, query_usage_30_day, team_id, project_id, last_seen_at, created_at)\n VALUES ($1, $2, NULL, NULL, $3, $4, $5, NOW())\n ON CONFLICT (coalesce(project_id, team_id), name)\n DO UPDATE SET last_seen_at = $5\n ", "describe": { "columns": [], "parameters": { @@ -14,5 +14,5 @@ }, "nullable": [] }, - "hash": "2b9a8c4b8d323e1673d805125b4073799ecba84594ca04cfb24481cffbf6f6ca" + "hash": "ed7f5f3607948d5f5d9fca2b513822cc3ed70f0c5c14593f3d32ef6dc2ae82f7" } diff --git a/rust/property-defs-rs/src/types.rs b/rust/property-defs-rs/src/types.rs index d437c62849f0e..fc3e30db09ddd 100644 --- a/rust/property-defs-rs/src/types.rs +++ b/rust/property-defs-rs/src/types.rs @@ -175,8 +175,8 @@ impl Event { let updates = self.into_updates_inner(); if updates.len() > skip_threshold { warn!( - "Event {} for team {} has more than 10,000 properties, skipping", - event, team_id + "Event {} for team {} has more than {} properties, skipping", + event, team_id, skip_threshold ); metrics::counter!(EVENTS_SKIPPED, &[("reason", "too_many_properties")]).increment(1); return vec![]; @@ -427,8 +427,8 @@ impl EventDefinition { sqlx::query!( r#" INSERT INTO posthog_eventdefinition (id, name, volume_30_day, query_usage_30_day, team_id, project_id, last_seen_at, created_at) - VALUES ($1, $2, NULL, NULL, $3, $4, $5, NOW()) ON CONFLICT - ON CONSTRAINT posthog_eventdefinition_team_id_name_80fa0b87_uniq + VALUES ($1, $2, NULL, NULL, $3, $4, $5, NOW()) + ON CONFLICT (coalesce(project_id, team_id), name) DO UPDATE SET last_seen_at = $5 "#, Uuid::now_v7(), @@ -472,7 +472,7 @@ impl PropertyDefinition { r#" INSERT INTO posthog_propertydefinition (id, name, type, group_type_index, is_numerical, volume_30_day, query_usage_30_day, team_id, project_id, property_type) VALUES ($1, $2, $3, $4, $5, NULL, NULL, $6, $7, $8) - ON CONFLICT (team_id, name, type, coalesce(group_type_index, -1)) + ON CONFLICT (coalesce(project_id, team_id), name, type, coalesce(group_type_index, -1)) DO UPDATE SET property_type=EXCLUDED.property_type WHERE posthog_propertydefinition.property_type IS NULL "#, Uuid::now_v7(), diff --git a/rust/property-defs-rs/tests/test_migrations/20240830124836_setup_propdefs_tables.sql b/rust/property-defs-rs/tests/test_migrations/20240830124836_setup_propdefs_tables.sql index 69b8e56b9e400..2291e6e0d0ce3 100644 --- a/rust/property-defs-rs/tests/test_migrations/20240830124836_setup_propdefs_tables.sql +++ b/rust/property-defs-rs/tests/test_migrations/20240830124836_setup_propdefs_tables.sql @@ -15,6 +15,7 @@ CREATE TABLE IF NOT EXISTS posthog_eventdefinition ( CONSTRAINT posthog_eventdefinition_team_id_name_80fa0b87_uniq UNIQUE (team_id, name) ); +CREATE UNIQUE INDEX event_definition_proj_uniq ON posthog_eventdefinition (coalesce(project_id, team_id), name); CREATE TABLE IF NOT EXISTS posthog_propertydefinition ( id UUID PRIMARY KEY, @@ -31,6 +32,7 @@ CREATE TABLE IF NOT EXISTS posthog_propertydefinition ( ); CREATE UNIQUE INDEX posthog_propertydefinition_uniq ON posthog_propertydefinition (team_id, name, type, coalesce(group_type_index, -1)); +CREATE UNIQUE INDEX posthog_propdef_proj_uniq ON posthog_propertydefinition (coalesce(project_id, team_id), name, type, coalesce(group_type_index, -1)); CREATE TABLE IF NOT EXISTS posthog_eventproperty ( @@ -42,3 +44,4 @@ CREATE TABLE IF NOT EXISTS posthog_eventproperty ( ); CREATE UNIQUE INDEX posthog_event_property_unique_team_event_property ON posthog_eventproperty (team_id, event, property); +CREATE UNIQUE INDEX posthog_event_property_unique_proj_event_property ON posthog_eventproperty (coalesce(project_id, team_id), event, property); diff --git a/rust/property-defs-rs/tests/updates.rs b/rust/property-defs-rs/tests/updates.rs index 8e78eeed00368..0cb3b86eea160 100644 --- a/rust/property-defs-rs/tests/updates.rs +++ b/rust/property-defs-rs/tests/updates.rs @@ -1,7 +1,9 @@ use chrono::{DateTime, Duration, Utc}; use property_defs_rs::types::{Event, PropertyParentType, PropertyValueType}; use serde_json::json; -use sqlx::PgPool; +use sqlx::postgres::PgArguments; +use sqlx::{Arguments, PgPool}; +use uuid::Uuid; #[sqlx::test(migrations = "./tests/test_migrations")] async fn test_updates(db: PgPool) { @@ -15,7 +17,6 @@ async fn test_updates(db: PgPool) { "some_bool_as_string": "true" } "#; - let event_src = json!({ "team_id": 1, "project_id": 1, @@ -105,6 +106,51 @@ async fn test_updates(db: PgPool) { .unwrap(); } +#[sqlx::test(migrations = "./tests/test_migrations")] +async fn test_update_on_project_id_conflict(db: PgPool) { + let definition_created_at: DateTime = Utc::now() - Duration::days(1); + let mut args = PgArguments::default(); + args.add(Uuid::now_v7()).unwrap(); + args.add("foo").unwrap(); + args.add(1).unwrap(); + args.add(definition_created_at).unwrap(); + sqlx::query_with( + r#" + INSERT INTO posthog_eventdefinition (id, name, volume_30_day, query_usage_30_day, team_id, project_id, last_seen_at, created_at) + VALUES ($1, $2, NULL, NULL, $3, NULL, $4, $4) -- project_id is NULL! This definition is from before environments + "#, args + ).execute(&db).await.unwrap(); + + assert_eventdefinition_exists( + &db, + "foo", + 1, + definition_created_at, + Duration::milliseconds(0), + ) + .await + .unwrap(); + + let before = Utc::now(); + let event_src = json!({ + "team_id": 3, + "project_id": 1, + "event": "foo", + "properties": "{}" + }); + + let event = serde_json::from_value::(event_src.clone()).unwrap(); + for update in event.into_updates(10000) { + update.issue(&db).await.unwrap(); + } + + // The event def we created earlier got updated, even though it has a different `team_id`, + // because `coalesce(project_id, team_id)` matches + assert_eventdefinition_exists(&db, "foo", 1, before, Duration::seconds(1)) + .await + .unwrap(); +} + async fn assert_eventdefinition_exists( db: &PgPool, name: &str,