Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(thumbnail cache): Enabling force parameter on screenshot/thumbnail cache #31757

Merged
merged 8 commits into from
Jan 31, 2025
Merged
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 75 additions & 45 deletions superset/charts/api.py
Original file line number Diff line number Diff line change
@@ -30,7 +30,7 @@
from werkzeug.wrappers import Response as WerkzeugResponse
from werkzeug.wsgi import FileWrapper

from superset import app, is_feature_enabled, thumbnail_cache
from superset import app, is_feature_enabled
from superset.charts.filters import (
ChartAllTextFilter,
ChartCertifiedFilter,
@@ -84,7 +84,12 @@
from superset.tasks.thumbnails import cache_chart_thumbnail
from superset.tasks.utils import get_current_user
from superset.utils import json
from superset.utils.screenshots import ChartScreenshot, DEFAULT_CHART_WINDOW_SIZE
from superset.utils.screenshots import (
ChartScreenshot,
DEFAULT_CHART_WINDOW_SIZE,
ScreenshotCachePayload,
StatusValues,
)
from superset.utils.urls import get_url_path
from superset.views.base_api import (
BaseSupersetModelRestApi,
@@ -564,8 +569,14 @@
schema:
$ref: '#/components/schemas/screenshot_query_schema'
responses:
200:
description: Chart async result
content:
application/json:
schema:
$ref: "#/components/schemas/ChartCacheScreenshotResponseSchema"
202:
description: Chart async result
description: Chart async task created
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry about the back and forth here, but this seems more appropriate

Suggested change
description: Chart async task created
description: Chart screenshot task created

content:
application/json:
schema:
@@ -580,6 +591,7 @@
$ref: '#/components/responses/500'
"""
rison_dict = kwargs["rison"]
force = rison_dict.get("force")

Check warning on line 594 in superset/charts/api.py

Codecov / codecov/patch

superset/charts/api.py#L594

Added line #L594 was not covered by tests
window_size = rison_dict.get("window_size") or DEFAULT_CHART_WINDOW_SIZE

# Don't shrink the image if thumb_size is not specified
@@ -591,25 +603,45 @@

chart_url = get_url_path("Superset.slice", slice_id=chart.id)
screenshot_obj = ChartScreenshot(chart_url, chart.digest)
cache_key = screenshot_obj.cache_key(window_size, thumb_size)
cache_key = screenshot_obj.get_cache_key(window_size, thumb_size)
cache_payload = (

Check warning on line 607 in superset/charts/api.py

Codecov / codecov/patch

superset/charts/api.py#L606-L607

Added lines #L606 - L607 were not covered by tests
screenshot_obj.get_from_cache_key(cache_key) or ScreenshotCachePayload()
)
image_url = get_url_path(
"ChartRestApi.screenshot", pk=chart.id, digest=cache_key
)

def trigger_celery() -> WerkzeugResponse:
def build_response(status_code: int) -> WerkzeugResponse:
return self.response(

Check warning on line 615 in superset/charts/api.py

Codecov / codecov/patch

superset/charts/api.py#L614-L615

Added lines #L614 - L615 were not covered by tests
status_code,
cache_key=cache_key,
chart_url=chart_url,
image_url=image_url,
task_updated_at=cache_payload.get_timestamp(),
task_status=cache_payload.get_status(),
)

error_cache_ttl = config["THUMBNAIL_ERROR_CACHE_TTL"]
error_cache_expired = (

Check warning on line 625 in superset/charts/api.py

Codecov / codecov/patch

superset/charts/api.py#L624-L625

Added lines #L624 - L625 were not covered by tests
datetime.now()
- datetime.strptime(cache_payload.get_timestamp(), "%Y/%m/%d-%H:%M:%S")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

Suggested change
- datetime.strptime(cache_payload.get_timestamp(), "%Y/%m/%d-%H:%M:%S")
- datetime.fromisoformat(cache_payload.get_timestamp())

).total_seconds() > error_cache_ttl
if (

Check warning on line 629 in superset/charts/api.py

Codecov / codecov/patch

superset/charts/api.py#L629

Added line #L629 was not covered by tests
cache_payload.status == StatusValues.PENDING
or (cache_payload.status == StatusValues.ERROR and error_cache_expired)
or force
):
logger.info("Triggering screenshot ASYNC")
screenshot_obj.cache.set(cache_key, ScreenshotCachePayload())

Check warning on line 635 in superset/charts/api.py

Codecov / codecov/patch

superset/charts/api.py#L635

Added line #L635 was not covered by tests
cache_chart_thumbnail.delay(
current_user=get_current_user(),
chart_id=chart.id,
force=True,
window_size=window_size,
thumb_size=thumb_size,
force=force,
)
return self.response(
202, cache_key=cache_key, chart_url=chart_url, image_url=image_url
)

return trigger_celery()
return build_response(202)
return build_response(200)

Check warning on line 644 in superset/charts/api.py

Codecov / codecov/patch

superset/charts/api.py#L643-L644

Added lines #L643 - L644 were not covered by tests

@expose("/<pk>/screenshot/<digest>/", methods=("GET",))
@protect()
@@ -635,7 +667,7 @@
name: digest
responses:
200:
description: Chart thumbnail image
description: Chart screenshot image
content:
image/*:
schema:
@@ -652,16 +684,16 @@
"""
chart = self.datamodel.get(pk, self._base_filters)

# Making sure the chart still exists
if not chart:
return self.response_404()

# fetch the chart screenshot using the current user and cache if set
if img := ChartScreenshot.get_from_cache_key(thumbnail_cache, digest):
return Response(
FileWrapper(img), mimetype="image/png", direct_passthrough=True
)
# TODO: return an empty image
if cache_payload := ChartScreenshot.get_from_cache_key(digest):
if cache_payload.status == StatusValues.UPDATED:
return Response(

Check warning on line 692 in superset/charts/api.py

Codecov / codecov/patch

superset/charts/api.py#L690-L692

Added lines #L690 - L692 were not covered by tests
FileWrapper(cache_payload.get_image()),
mimetype="image/png",
direct_passthrough=True,
)
return self.response_404()

@expose("/<pk>/thumbnail/<digest>/", methods=("GET",))
@@ -685,9 +717,10 @@
type: integer
name: pk
- in: path
name: digest
description: A hex digest that makes this chart unique
schema:
type: string
name: digest
responses:
200:
description: Chart thumbnail image
@@ -712,44 +745,41 @@
return self.response_404()

current_user = get_current_user()
url = get_url_path("Superset.slice", slice_id=chart.id)
if kwargs["rison"].get("force", False):
logger.info(
"Triggering thumbnail compute (chart id: %s) ASYNC", str(chart.id)
)
cache_chart_thumbnail.delay(
current_user=current_user,
chart_id=chart.id,
force=True,
if chart.digest != digest:
self.incr_stats("redirect", self.thumbnail.__name__)
return redirect(
url_for(
f"{self.__class__.__name__}.thumbnail", pk=pk, digest=chart.digest
)
)
return self.response(202, message="OK Async")
# fetch the chart screenshot using the current user and cache if set
screenshot = ChartScreenshot(url, chart.digest).get_from_cache(
cache=thumbnail_cache
url = get_url_path("Superset.slice", slice_id=chart.id)
screenshot_obj = ChartScreenshot(url, chart.digest)
cache_key = screenshot_obj.get_cache_key()
cache_payload = (
screenshot_obj.get_from_cache_key(cache_key) or ScreenshotCachePayload()
)
# If not screenshot then send request to compute thumb to celery
if not screenshot:

if cache_payload.status in [StatusValues.PENDING, StatusValues.ERROR]:
self.incr_stats("async", self.thumbnail.__name__)
logger.info(
"Triggering thumbnail compute (chart id: %s) ASYNC", str(chart.id)
)
screenshot_obj.cache.set(cache_key, ScreenshotCachePayload())
cache_chart_thumbnail.delay(
current_user=current_user,
chart_id=chart.id,
force=True,
force=False,
)
return self.response(202, message="OK Async")
# If digests
if chart.digest != digest:
self.incr_stats("redirect", self.thumbnail.__name__)
return redirect(
url_for(
f"{self.__class__.__name__}.thumbnail", pk=pk, digest=chart.digest
)
return self.response(
202,
task_updated_at=cache_payload.get_timestamp(),
task_status=cache_payload.get_status(),
)
self.incr_stats("from_cache", self.thumbnail.__name__)
return Response(
FileWrapper(screenshot), mimetype="image/png", direct_passthrough=True
FileWrapper(cache_payload.get_image()),
mimetype="image/png",
direct_passthrough=True,
)

@expose("/export/", methods=("GET",))
15 changes: 15 additions & 0 deletions superset/charts/schemas.py
Original file line number Diff line number Diff line change
@@ -304,6 +304,21 @@ class ChartCacheScreenshotResponseSchema(Schema):
image_url = fields.String(
metadata={"description": "The url to fetch the screenshot"}
)
task_status = fields.String(
metadata={"description": "The status of the async screenshot"}
)
task_updated_at = fields.String(
metadata={"description": "The timestamp of the last change in status"}
)


class ChartGetCachedScreenshotResponseSchema(Schema):
task_status = fields.String(
metadata={"description": "The status of the async screenshot"}
)
task_updated_at = fields.String(
metadata={"description": "The timestamp of the last change in status"}
)


class ChartDataColumnSchema(Schema):
2 changes: 2 additions & 0 deletions superset/config.py
Original file line number Diff line number Diff line change
@@ -729,8 +729,10 @@ class D3TimeFormat(TypedDict, total=False):

THUMBNAIL_CACHE_CONFIG: CacheConfig = {
"CACHE_TYPE": "NullCache",
"CACHE_DEFAULT_TIMEOUT": int(timedelta(days=7).total_seconds()),
"CACHE_NO_NULL_WARNING": True,
}
THUMBNAIL_ERROR_CACHE_TTL = int(timedelta(days=1).total_seconds())

# Time before selenium times out after trying to locate an element on the page and wait
# for that element to load for a screenshot.
Loading