Skip to content

Commit

Permalink
feat(environments): Make taxonomy reads + writes project–based (#26766)
Browse files Browse the repository at this point in the history
Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
Twixes and github-actions[bot] authored Dec 16, 2024
1 parent 372c92c commit 78959b2
Show file tree
Hide file tree
Showing 12 changed files with 225 additions and 30 deletions.
21 changes: 12 additions & 9 deletions posthog/api/property_definition.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import dataclasses
import json
from typing import Any, Optional, cast
from django.db.models.functions import Coalesce

from django.db import connection
from django.db import connection, models
from loginas.utils import is_impersonated_session
from rest_framework import mixins, request, response, serializers, status, viewsets
from posthog.api.utils import action
Expand Down Expand Up @@ -99,7 +100,7 @@ class QueryContext:
The raw query is used to both query and count these results
"""

team_id: int
project_id: int
table: str
property_definition_fields: str
property_definition_table: str
Expand Down Expand Up @@ -232,7 +233,7 @@ def with_search(self, search_query: str, search_kwargs: dict) -> "QueryContext":
return dataclasses.replace(
self,
search_query=search_query,
params={**self.params, "team_id": self.team_id, **search_kwargs},
params={**self.params, "project_id": self.project_id, **search_kwargs},
)

def with_excluded_properties(self, excluded_properties: Optional[str], type: str) -> "QueryContext":
Expand Down Expand Up @@ -264,7 +265,7 @@ def as_sql(self, order_by_verified: bool):
SELECT {self.property_definition_fields}, {self.event_property_field} AS is_seen_on_filtered_events
FROM {self.table}
{self._join_on_event_property()}
WHERE {self.property_definition_table}.team_id = %(team_id)s
WHERE coalesce({self.property_definition_table}.project_id, {self.property_definition_table}.team_id) = %(project_id)s
AND type = %(type)s
AND coalesce(group_type_index, -1) = %(group_type_index)s
{self.excluded_properties_filter}
Expand All @@ -281,7 +282,7 @@ def as_count_sql(self):
SELECT count(*) as full_count
FROM {self.table}
{self._join_on_event_property()}
WHERE {self.property_definition_table}.team_id = %(team_id)s
WHERE coalesce({self.property_definition_table}.project_id, {self.property_definition_table}.team_id) = %(project_id)s
AND type = %(type)s
AND coalesce(group_type_index, -1) = %(group_type_index)s
{self.excluded_properties_filter} {self.name_filter} {self.numerical_filter} {self.search_query} {self.event_property_filter} {self.is_feature_flag_filter}
Expand All @@ -296,7 +297,7 @@ def _join_on_event_property(self):
{self.event_property_join_type} (
SELECT DISTINCT property
FROM posthog_eventproperty
WHERE team_id = %(team_id)s {self.event_name_join_filter}
WHERE coalesce(project_id, team_id) = %(project_id)s {self.event_name_join_filter}
) {self.posthog_eventproperty_table_join_alias}
ON {self.posthog_eventproperty_table_join_alias}.property = name
"""
Expand Down Expand Up @@ -537,7 +538,7 @@ def dangerously_get_queryset(self):

query_context = (
QueryContext(
team_id=self.team_id,
project_id=self.project_id,
table=(
"ee_enterprisepropertydefinition FULL OUTER JOIN posthog_propertydefinition ON posthog_propertydefinition.id=ee_enterprisepropertydefinition.propertydefinition_ptr_id"
if use_enterprise_taxonomy
Expand Down Expand Up @@ -621,8 +622,10 @@ def seen_together(self, request: request.Request, *args: Any, **kwargs: Any) ->
serializer = SeenTogetherQuerySerializer(data=request.GET)
serializer.is_valid(raise_exception=True)

matches = EventProperty.objects.filter(
team_id=self.team_id,
matches = EventProperty.objects.alias(
effective_project_id=Coalesce("project_id", "team_id", output_field=models.BigIntegerField())
).filter(
effective_project_id=self.project_id, # type: ignore
event__in=serializer.validated_data["event_names"],
property=serializer.validated_data["property_name"],
)
Expand Down
2 changes: 1 addition & 1 deletion posthog/api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ def create_event_definitions_sql(
SELECT {",".join(event_definition_fields)}
FROM posthog_eventdefinition
{enterprise_join}
WHERE team_id = %(project_id)s {conditions}
WHERE coalesce(project_id, team_id) = %(project_id)s {conditions}
ORDER BY {additional_ordering}name ASC
"""

Expand Down
119 changes: 119 additions & 0 deletions posthog/migrations/0532_taxonomy_unique_on_project.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# Generated by Django 4.2.15 on 2024-12-09 15:51

from django.db import migrations
from django.db import models
import django.db.models.functions.comparison
import posthog.models.utils
from django.contrib.postgres.operations import AddIndexConcurrently, RemoveIndexConcurrently


class Migration(migrations.Migration):
atomic = False # Added to support concurrent index creation
dependencies = [("posthog", "0531_alter_hogfunction_type")]

operations = [
# First clean up rows that would fail the project-based unique constraints we're adding
migrations.RunSQL(
sql="""
DELETE FROM posthog_propertydefinition
WHERE team_id IN (
SELECT id FROM posthog_team WHERE id != project_id
);""",
reverse_sql=migrations.RunSQL.noop,
elidable=True,
),
migrations.RunSQL(
sql="""
DELETE FROM posthog_eventdefinition
WHERE team_id IN (
SELECT id FROM posthog_team WHERE id != project_id
);""",
reverse_sql=migrations.RunSQL.noop,
elidable=True,
),
migrations.RunSQL(
sql="""
DELETE FROM posthog_eventproperty
WHERE team_id IN (
SELECT id FROM posthog_team WHERE id != project_id
);""",
reverse_sql=migrations.RunSQL.noop,
elidable=True,
),
# Remove misguided `project_id`-only indexes from the previous migration
RemoveIndexConcurrently(
model_name="eventproperty",
name="posthog_eve_proj_id_22de03_idx",
),
RemoveIndexConcurrently(
model_name="eventproperty",
name="posthog_eve_proj_id_26dbfb_idx",
),
RemoveIndexConcurrently(
model_name="propertydefinition",
name="index_property_def_query_proj",
),
RemoveIndexConcurrently(
model_name="propertydefinition",
name="posthog_pro_project_3583d2_idx",
),
# Add new useful indexes using `coalesce(project_id, team_id)`
AddIndexConcurrently(
model_name="eventproperty",
index=models.Index(
django.db.models.functions.comparison.Coalesce(models.F("project_id"), models.F("team_id")),
models.F("event"),
name="posthog_eve_proj_id_22de03_idx",
),
),
AddIndexConcurrently(
model_name="eventproperty",
index=models.Index(
django.db.models.functions.comparison.Coalesce(models.F("project_id"), models.F("team_id")),
models.F("property"),
name="posthog_eve_proj_id_26dbfb_idx",
),
),
AddIndexConcurrently(
model_name="propertydefinition",
index=models.Index(
django.db.models.functions.comparison.Coalesce(models.F("project_id"), models.F("team_id")),
models.F("type"),
django.db.models.functions.comparison.Coalesce(models.F("group_type_index"), -1),
models.OrderBy(models.F("query_usage_30_day"), descending=True, nulls_last=True),
models.OrderBy(models.F("name")),
name="index_property_def_query_proj",
),
),
AddIndexConcurrently(
model_name="propertydefinition",
index=models.Index(
django.db.models.functions.comparison.Coalesce(models.F("project_id"), models.F("team_id")),
models.F("type"),
models.F("is_numerical"),
name="posthog_pro_project_3583d2_idx",
),
),
migrations.AddConstraint(
model_name="eventdefinition",
constraint=posthog.models.utils.UniqueConstraintByExpression(
concurrently=True, expression="(coalesce(project_id, team_id), name)", name="event_definition_proj_uniq"
),
),
migrations.AddConstraint(
model_name="eventproperty",
constraint=posthog.models.utils.UniqueConstraintByExpression(
concurrently=True,
expression="(coalesce(project_id, team_id), event, property)",
name="posthog_event_property_unique_proj_event_property",
),
),
migrations.AddConstraint(
model_name="propertydefinition",
constraint=posthog.models.utils.UniqueConstraintByExpression(
concurrently=True,
expression="(coalesce(project_id, team_id), name, type, coalesce(group_type_index, -1))",
name="posthog_propdef_proj_uniq",
),
),
]
2 changes: 1 addition & 1 deletion posthog/migrations/max_migration.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0531_alter_hogfunction_type
0532_taxonomy_unique_on_project
11 changes: 9 additions & 2 deletions posthog/models/event_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from django.utils import timezone

from posthog.models.team import Team
from posthog.models.utils import UUIDModel
from posthog.models.utils import UUIDModel, UniqueConstraintByExpression


class EventDefinition(UUIDModel):
Expand All @@ -27,7 +27,6 @@ class EventDefinition(UUIDModel):
volume_30_day = models.IntegerField(default=None, null=True)

class Meta:
unique_together = ("team", "name")
indexes = [
# Index on project_id foreign key
models.Index(fields=["project"], name="posthog_eve_proj_id_f93fcbb0"),
Expand All @@ -37,6 +36,14 @@ class Meta:
opclasses=["gin_trgm_ops"],
), # To speed up DB-based fuzzy searching
]
unique_together = ("team", "name")
constraints = [
UniqueConstraintByExpression(
concurrently=True,
name="event_definition_proj_uniq",
expression="(coalesce(project_id, team_id), name)",
)
]

def __str__(self) -> str:
return f"{self.name} / {self.team.name}"
15 changes: 11 additions & 4 deletions posthog/models/event_property.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from django.db import models

from posthog.models.team import Team
from posthog.models.utils import sane_repr
from posthog.models.utils import UniqueConstraintByExpression, sane_repr
from django.db.models.expressions import F
from django.db.models.functions import Coalesce


class EventProperty(models.Model):
Expand All @@ -15,15 +17,20 @@ class Meta:
models.UniqueConstraint(
fields=["team", "event", "property"],
name="posthog_event_property_unique_team_event_property",
)
),
UniqueConstraintByExpression(
concurrently=True,
name="posthog_event_property_unique_proj_event_property",
expression="(coalesce(project_id, team_id), event, property)",
),
]
indexes = [
# Index on project_id foreign key
models.Index(fields=["project"], name="posthog_eve_proj_id_dd2337d2"),
models.Index(fields=["team", "event"]),
models.Index(fields=["project", "event"], name="posthog_eve_proj_id_22de03_idx"),
models.Index(Coalesce(F("project_id"), F("team_id")), F("event"), name="posthog_eve_proj_id_22de03_idx"),
models.Index(fields=["team", "property"]),
models.Index(fields=["project", "property"], name="posthog_eve_proj_id_26dbfb_idx"),
models.Index(Coalesce(F("project_id"), F("team_id")), F("property"), name="posthog_eve_proj_id_26dbfb_idx"),
]

__repr__ = sane_repr("event", "property", "team_id")
14 changes: 12 additions & 2 deletions posthog/models/property_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class Meta:
name="index_property_def_query",
),
models.Index(
F("project_id"),
Coalesce(F("project_id"), F("team_id")),
F("type"),
Coalesce(F("group_type_index"), -1),
F("query_usage_30_day").desc(nulls_last=True),
Expand All @@ -93,7 +93,12 @@ class Meta:
# creates an index pganalyze identified as missing
# https://app.pganalyze.com/servers/i35ydkosi5cy5n7tly45vkjcqa/checks/index_advisor/missing_index/15282978
models.Index(fields=["team_id", "type", "is_numerical"]),
models.Index(fields=["project_id", "type", "is_numerical"], name="posthog_pro_project_3583d2_idx"),
models.Index(
Coalesce(F("project_id"), F("team_id")),
F("type"),
F("is_numerical"),
name="posthog_pro_project_3583d2_idx",
),
GinIndex(
name="index_property_definition_name",
fields=["name"],
Expand All @@ -113,6 +118,11 @@ class Meta:
name="posthog_propertydefinition_uniq",
expression="(team_id, name, type, coalesce(group_type_index, -1))",
),
UniqueConstraintByExpression(
concurrently=True,
name="posthog_propdef_proj_uniq",
expression="(coalesce(project_id, team_id), name, type, coalesce(group_type_index, -1))",
),
]

def __str__(self) -> str:
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions rust/property-defs-rs/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,8 @@ impl Event {
let updates = self.into_updates_inner();
if updates.len() > skip_threshold {
warn!(
"Event {} for team {} has more than 10,000 properties, skipping",
event, team_id
"Event {} for team {} has more than {} properties, skipping",
event, team_id, skip_threshold
);
metrics::counter!(EVENTS_SKIPPED, &[("reason", "too_many_properties")]).increment(1);
return vec![];
Expand Down Expand Up @@ -427,8 +427,8 @@ impl EventDefinition {
sqlx::query!(
r#"
INSERT INTO posthog_eventdefinition (id, name, volume_30_day, query_usage_30_day, team_id, project_id, last_seen_at, created_at)
VALUES ($1, $2, NULL, NULL, $3, $4, $5, NOW()) ON CONFLICT
ON CONSTRAINT posthog_eventdefinition_team_id_name_80fa0b87_uniq
VALUES ($1, $2, NULL, NULL, $3, $4, $5, NOW())
ON CONFLICT (coalesce(project_id, team_id), name)
DO UPDATE SET last_seen_at = $5
"#,
Uuid::now_v7(),
Expand Down Expand Up @@ -472,7 +472,7 @@ impl PropertyDefinition {
r#"
INSERT INTO posthog_propertydefinition (id, name, type, group_type_index, is_numerical, volume_30_day, query_usage_30_day, team_id, project_id, property_type)
VALUES ($1, $2, $3, $4, $5, NULL, NULL, $6, $7, $8)
ON CONFLICT (team_id, name, type, coalesce(group_type_index, -1))
ON CONFLICT (coalesce(project_id, team_id), name, type, coalesce(group_type_index, -1))
DO UPDATE SET property_type=EXCLUDED.property_type WHERE posthog_propertydefinition.property_type IS NULL
"#,
Uuid::now_v7(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ CREATE TABLE IF NOT EXISTS posthog_eventdefinition (
CONSTRAINT posthog_eventdefinition_team_id_name_80fa0b87_uniq UNIQUE (team_id, name)
);

CREATE UNIQUE INDEX event_definition_proj_uniq ON posthog_eventdefinition (coalesce(project_id, team_id), name);

CREATE TABLE IF NOT EXISTS posthog_propertydefinition (
id UUID PRIMARY KEY,
Expand All @@ -31,6 +32,7 @@ CREATE TABLE IF NOT EXISTS posthog_propertydefinition (
);

CREATE UNIQUE INDEX posthog_propertydefinition_uniq ON posthog_propertydefinition (team_id, name, type, coalesce(group_type_index, -1));
CREATE UNIQUE INDEX posthog_propdef_proj_uniq ON posthog_propertydefinition (coalesce(project_id, team_id), name, type, coalesce(group_type_index, -1));


CREATE TABLE IF NOT EXISTS posthog_eventproperty (
Expand All @@ -42,3 +44,4 @@ CREATE TABLE IF NOT EXISTS posthog_eventproperty (
);

CREATE UNIQUE INDEX posthog_event_property_unique_team_event_property ON posthog_eventproperty (team_id, event, property);
CREATE UNIQUE INDEX posthog_event_property_unique_proj_event_property ON posthog_eventproperty (coalesce(project_id, team_id), event, property);
Loading

0 comments on commit 78959b2

Please sign in to comment.