Skip to content

Commit

Permalink
perf: fix N+1 and some duplicate query issues across CatalogSerializer (
Browse files Browse the repository at this point in the history
#4206)

* perf: fix N+1 and some duplicate query issues across CourseSerializer

* chore: address review comments, run edx_lint to ignore broad-except
  • Loading branch information
DawoudSheraz authored Dec 27, 2023
1 parent 5ae20e6 commit 88a3617
Show file tree
Hide file tree
Showing 11 changed files with 102 additions and 79 deletions.
27 changes: 13 additions & 14 deletions course_discovery/apps/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1275,7 +1275,7 @@ class Meta(AbstractLocationRestrictionSerializer.Meta):
class CourseSerializer(TaggitSerializer, MinimalCourseSerializer):
"""Serializer for the ``Course`` model."""
level_type = SlugRelatedTranslatableField(required=False, allow_null=True, slug_field='name_t',
queryset=LevelType.objects.all())
queryset=LevelTypeSerializer.prefetch_queryset(LevelType.objects.all()))
subjects = SlugRelatedFieldWithReadSerializer(slug_field='slug', required=False, many=True,
queryset=SubjectSerializer.prefetch_queryset(),
read_serializer=SubjectSerializer())
Expand Down Expand Up @@ -1307,20 +1307,14 @@ class CourseSerializer(TaggitSerializer, MinimalCourseSerializer):
geolocation = GeoLocationSerializer(required=False, allow_null=True)
location_restriction = CourseLocationRestrictionSerializer(required=False)
in_year_value = ProductValueSerializer(required=False)
product_source = serializers.SlugRelatedField(required=False, slug_field='slug', queryset=Source.objects.all())
product_source = SlugRelatedFieldWithReadSerializer(
required=False, slug_field='slug', queryset=Source.objects.all(),
read_serializer=SourceSerializer()
)
watchers = serializers.ListField(
child=serializers.EmailField(), allow_empty=True, allow_null=True, required=False
)

def to_representation(self, instance):
"""
Conversion of the source slug to the source serializer data
"""
representation = super().to_representation(instance)
if representation.get('product_source'):
representation['product_source'] = SourceSerializer(Source.objects.get(slug=representation['product_source'])).data # pylint: disable=line-too-long
return representation

def get_organization_logo_override_url(self, obj):
logo_image_override = getattr(obj, 'organization_logo_override', None)
if logo_image_override:
Expand Down Expand Up @@ -1667,14 +1661,19 @@ def prefetch_queryset(cls, partner, queryset=None, course_runs=None):
filtered CourseRun queryset.
"""
queryset = queryset if queryset is not None else Course.objects.filter(partner=partner)

return queryset.select_related('level_type', 'video', 'partner').prefetch_related(
return queryset.select_related(
'type', '_official_version', 'level_type', 'video', 'partner', 'additional_metadata',
'location_restriction', 'in_year_value', 'product_source', 'canonical_course_run'
).prefetch_related(
'expected_learning_items',
'collaborators',
'prerequisites',
'subjects',
'subjects__translations',
'url_slug_history',
'topics',
'editors',
'url_redirects',
'level_type__translations',
cls.prefetch_course_runs(CourseRunSerializer, course_runs),
Prefetch('authoring_organizations', queryset=OrganizationSerializer.prefetch_queryset(partner)),
Prefetch('sponsoring_organizations', queryset=OrganizationSerializer.prefetch_queryset(partner)),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -275,8 +275,8 @@ def test_dtd_with_valid_data(self):
assert response.status_code == 200

filename = abspath(join(dirname(dirname(__file__)), 'affiliate_window_product_feed.1.4.dtd'))
dtd = etree.DTD(open(filename)) # lint-amnesty, pylint: disable=consider-using-with,c-extension-no-member
root = etree.XML(response.content) # pylint: disable=c-extension-no-member
dtd = etree.DTD(open(filename)) # lint-amnesty, pylint: disable=consider-using-with
root = etree.XML(response.content)
assert dtd.validate(root)

def test_permissions(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ def test_courses(self, state):
# to be included.
filtered_course_run = CourseRunFactory(course=course)

with self.assertNumQueries(48, threshold=3):
with self.assertNumQueries(38, threshold=3):
response = self.client.get(url)

assert response.status_code == 200
Expand Down
16 changes: 8 additions & 8 deletions course_discovery/apps/api/v1/tests/test_views/test_courses.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def test_get(self):
""" Verify the endpoint returns the details for a single course. """
url = reverse('api:v1:course-detail', kwargs={'key': self.course.key})

with self.assertNumQueries(48, threshold=3):
with self.assertNumQueries(43, threshold=3):
response = self.client.get(url)
assert response.status_code == 200
assert response.data == self.serialize_course(self.course)
Expand All @@ -112,7 +112,7 @@ def test_get_uuid(self):
""" Verify the endpoint returns the details for a single course with UUID. """
url = reverse('api:v1:course-detail', kwargs={'key': self.course.uuid})

with self.assertNumQueries(47):
with self.assertNumQueries(44):
response = self.client.get(url)
assert response.status_code == 200
assert response.data == self.serialize_course(self.course)
Expand All @@ -121,7 +121,7 @@ def test_get_exclude_deleted_programs(self):
""" Verify the endpoint returns no deleted associated programs """
ProgramFactory(courses=[self.course], status=ProgramStatus.Deleted)
url = reverse('api:v1:course-detail', kwargs={'key': self.course.key})
with self.assertNumQueries(47):
with self.assertNumQueries(43):
response = self.client.get(url)
assert response.status_code == 200
assert response.data.get('programs') == []
Expand All @@ -134,7 +134,7 @@ def test_get_include_deleted_programs(self):
ProgramFactory(courses=[self.course], status=ProgramStatus.Deleted)
url = reverse('api:v1:course-detail', kwargs={'key': self.course.key})
url += '?include_deleted_programs=1'
with self.assertNumQueries(50):
with self.assertNumQueries(47):
response = self.client.get(url)
assert response.status_code == 200
assert response.data == self.serialize_course(self.course, extra_context={'include_deleted_programs': True})
Expand Down Expand Up @@ -282,7 +282,7 @@ def test_list(self):
""" Verify the endpoint returns a list of all courses. """
url = reverse('api:v1:course-list')

with self.assertNumQueries(35, threshold=3):
with self.assertNumQueries(28, threshold=3):
response = self.client.get(url)
assert response.status_code == 200
self.assertListEqual(
Expand All @@ -300,7 +300,7 @@ def test_list_query(self):

# Known to be flaky prior to the addition of tearDown()
# and logout() code which is the same number of additional queries
with self.assertNumQueries(52, threshold=3):
with self.assertNumQueries(40, threshold=3):
response = self.client.get(url)
self.assertListEqual(response.data['results'], self.serialize_course(courses, many=True))

Expand All @@ -310,7 +310,7 @@ def test_list_key_filter(self):
keys = ','.join([course.key for course in courses])
url = '{root}?{params}'.format(root=reverse('api:v1:course-list'), params=urlencode({'keys': keys}))

with self.assertNumQueries(51, threshold=3):
with self.assertNumQueries(39, threshold=3):
response = self.client.get(url)
self.assertListEqual(response.data['results'], self.serialize_course(courses, many=True))

Expand All @@ -322,7 +322,7 @@ def test_list_uuid_filter(self):
# Increasing threshold because Course skill fetch SQL queries are executed twice
# on CI. Listing returns skill details and skill names as two separate fields.
# TODO: Figure out why the cache behavior is not working as expected on CI.
with self.assertNumQueries(51, threshold=5):
with self.assertNumQueries(40, threshold=5):
response = self.client.get(url)
self.assertListEqual(response.data['results'], self.serialize_course(courses, many=True))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,14 +176,14 @@ def ingest(self): # pylint: disable=too-many-statements
course_run, is_course_run_created = self._get_or_create_course_run(
row, course, course_type, course_run_type.uuid
)
except Exception as exc:
except Exception as exc: # pylint: disable=broad-except
logger.exception(exc)
continue
else:
logger.info("Course key {} could not be found in database, creating the course.".format(course_key)) # lint-amnesty, pylint: disable=logging-format-interpolation
try:
_ = self._create_course(row, course_type, course_run_type.uuid)
except Exception as exc:
except Exception as exc: # pylint: disable=broad-except
exception_message = exc
if hasattr(exc, 'response'):
exception_message = exc.response.content.decode('utf-8')
Expand Down Expand Up @@ -316,7 +316,7 @@ def _get_or_create_course_run(self, data, course, course_type, course_run_type_u
last_run = course_runs.last()
_ = self._create_course_run(data, course, course_type, course_run_type_uuid, last_run.key)
is_course_run_created = True
except Exception as exc: # pylint: disable=broad-except
except Exception as exc:
exception_message = exc
if hasattr(exc, 'response'):
exception_message = exc.response.content.decode('utf-8')
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from django.core.files.uploadedfile import SimpleUploadedFile
from django.core.management import call_command
from django.test import TestCase
from edx_django_utils.cache import RequestCache
from edx_toggles.toggles.testutils import override_waffle_switch
from slugify import slugify
from testfixtures import LogCapture
Expand Down Expand Up @@ -50,6 +51,7 @@ def setUp(self):
product_source=self.product_source, partner=partner, title='test_course'
)
self.course3_non_draft.url_slug_history.add(course_url_slug)
RequestCache("active_url_cache").clear()
self.csv_header = 'course_uuids\n'
self.csv_file_content = self.csv_header + str(self.course1.uuid) + '\n' + str(self.course2.uuid)
self.csv_file = SimpleUploadedFile(
Expand Down
46 changes: 33 additions & 13 deletions course_discovery/apps/course_metadata/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from django_elasticsearch_dsl.registries import registry
from django_extensions.db.fields import AutoSlugField
from django_extensions.db.models import TimeStampedModel
from edx_django_utils.cache import RequestCache, get_cache_key
from elasticsearch.exceptions import RequestError
from elasticsearch_dsl.query import Q as ESDSLQ
from localflavor.us.us_states import CONTIGUOUS_STATES
Expand Down Expand Up @@ -58,8 +59,9 @@
IS_SUBDIRECTORY_SLUG_FORMAT_FOR_EXEC_ED_ENABLED
)
from course_discovery.apps.course_metadata.utils import (
UploadToFieldNamePath, clean_query, custom_render_variations, get_slug_for_course, is_ocm_course,
push_to_ecommerce_for_course_run, push_tracks_to_lms_for_course_run, set_official_state, subtract_deadline_delta
UploadToFieldNamePath, clean_query, clear_slug_request_cache_for_course, custom_render_variations,
get_slug_for_course, is_ocm_course, push_to_ecommerce_for_course_run, push_tracks_to_lms_for_course_run,
set_official_state, subtract_deadline_delta
)
from course_discovery.apps.ietf_language_tags.models import LanguageTag
from course_discovery.apps.publisher.utils import VALID_CHARS_IN_COURSE_NUM_AND_ORG_KEY
Expand Down Expand Up @@ -1587,11 +1589,22 @@ def active_url_slug(self):
""" Official rows just return whatever slug is active, draft rows will first look for an associated active
slug and, if they fail to find one, take the slug associated with the official course that has
is_active_on_draft: True."""
active_url = self.url_slug_history.filter(is_active=True).first()
active_url_cache = RequestCache("active_url_cache")
cache_key = get_cache_key(course_uuid=self.uuid, draft=self.draft)
cached_active_url = active_url_cache.get_cached_response(cache_key)
if cached_active_url.is_found:
return cached_active_url.value

active_url = CourseUrlSlug.objects.filter(course=self, is_active=True).first()

if not active_url and self.draft and self.official_version:
# current draft url slug has already been published at least once, so get it from the official course
active_url = self.official_version.url_slug_history.filter(is_active_on_draft=True).first()
return getattr(active_url, 'url_slug', None)
active_url = CourseUrlSlug.objects.filter(course=self.official_version, is_active_on_draft=True).first()

url_slug = getattr(active_url, 'url_slug', None)
if url_slug:
active_url_cache.set(cache_key, url_slug)
return url_slug

def get_course_url_path(self):
"""
Expand Down Expand Up @@ -1684,15 +1697,19 @@ def course_run_statuses(self):
Returns all unique course run status values inside this course.
Note that it skips hidden courses - this list is typically used for presentational purposes.
The code performs the filtering on Python level instead of ORM/SQL because filtering on course_runs
invalidates the prefetch on API level.
"""
statuses = []
statuses = set()
now = datetime.datetime.now(pytz.UTC)
runs = self.course_runs.exclude(hidden=True)
if runs.filter(status=CourseRunStatus.Unpublished, end__lt=now).exists():
statuses = ['archived']
runs = runs.exclude(status=CourseRunStatus.Unpublished, end__lt=now)
statuses += list(runs.values_list('status', flat=True).distinct().order_by('status'))
return statuses
for course_run in self.course_runs.all():
if course_run.hidden:
continue
if course_run.end and course_run.end < now and course_run.status == CourseRunStatus.Unpublished:
statuses.add('archived')
else:
statuses.add(course_run.status)
return sorted(list(statuses))

def unpublish_inactive_runs(self, published_runs=None):
"""
Expand Down Expand Up @@ -1767,8 +1784,11 @@ def set_active_url_slug(self, slug):
logger.info('The current slug is {}; The slug to be set is {}; Current course is a draft: {}' # lint-amnesty, pylint: disable=logging-format-interpolation
.format(self.url_slug, slug, self.draft))

# There are too many branches in this method, and it is ok to clear cache than to serve stale data.
clear_slug_request_cache_for_course(self.uuid)

if slug:
# case 0: if slug is already in use with another, rasie an IntegrityError
# case 0: if slug is already in use with another, raise an IntegrityError
excluded_course = self.official_version if self.draft else self.draft_version
slug_already_in_use = CourseUrlSlug.objects.filter(url_slug__iexact=slug.lower()).exclude(
course__in=[self, excluded_course]).exists()
Expand Down
8 changes: 7 additions & 1 deletion course_discovery/apps/course_metadata/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from django.core.management import call_command
from django.db import IntegrityError, transaction
from django.test import TestCase, override_settings
from edx_django_utils.cache import RequestCache
from edx_toggles.toggles.testutils import override_waffle_switch
from freezegun import freeze_time
from slugify import slugify
Expand Down Expand Up @@ -562,6 +563,9 @@ def test_set_active_url_slug__draft_with_official_version(self):
non_draft_course = CourseFactory(draft_version=draft_course, title=draft_course.title, key=draft_course.key)
draft_course.url_slug_history.all().delete()
non_draft_course.url_slug_history.all().delete()
# Need to clear cache explicitly as marketing_url creation, that uses active_url_slug, sets the
# cache with factory data
RequestCache("active_url_cache").clear()
draft_previous_data_modified_timestamp = draft_course.data_modified_timestamp
non_draft_previous_data_modified_timestamp = non_draft_course.data_modified_timestamp
with LogCapture(LOGGER_PATH) as logger:
Expand Down Expand Up @@ -590,7 +594,9 @@ def test_set_active_url_slug__draft_with_official_version_matching_slug(self):
non_draft_course = CourseFactory(draft_version=draft_course, title=draft_course.title, key=draft_course.key)
draft_course.url_slug_history.all().delete()
non_draft_course.url_slug_history.all().delete()

# Need to clear cache explicitly as marketing_url creation, that uses active_url_slug, sets the
# cache with factory data
RequestCache("active_url_cache").clear()
CourseUrlSlugFactory(course=draft_course, is_active=True, is_active_on_draft=True, url_slug='test-course')
CourseUrlSlugFactory(course=non_draft_course, is_active=True, is_active_on_draft=False, url_slug='slug1')
non_draft_slug_obj = CourseUrlSlugFactory(
Expand Down
Loading

0 comments on commit 88a3617

Please sign in to comment.