Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes for permissions cache and localization issues #489

Merged
merged 2 commits into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions scripts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,10 @@ def get_text_for_translations(root_path):
for file in files:
if not file.endswith(".yaml"):
continue

path = os.path.join(root, file)
print(f"Reading {path}")
with open(path, 'r') as asset_file:
asset_str = asset_file.read().replace("{{", "'")
asset_str = asset_str.replace("}}", "'")
asset_str = asset_file.read()

asset = yaml.safe_load(asset_str)
strings.extend(mark_text_for_translation(asset))
Expand Down
12 changes: 12 additions & 0 deletions tutoraspects/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@
"da",
"de_DE",
"el",
"en",
"es_419",
"es_ES",
"fr_CA",
Expand All @@ -333,6 +334,17 @@
],
),
("SUPERSET_EXTRA_JINJA_FILTERS", {}),
# This controls the cache time of the can_view_courses
# wrapper, which controls the course-based permissions.
# This includes the user roles and course list. This
# does not get cleared on login, and so should be kept
# short since mostly most of the savings comes from the
# course cache anyway.
("SUPERSET_USER_PERMISSIONS_CACHE_TIMEOUT", 120),
# This controls the cache time of the user's course list
# only, limiting the number of LMS calls since they are
# rate limited. This can be cleared by logging back in.
("SUPERSET_USER_COURSES_CACHE_TIMEOUT", 300),
("SUPERSET_BLOCK_STUDENT_ACCESS", True),
# This setting allows Superset to run behind a reverse proxy in HTTPS and
# redirect to the correct http/s based on the headers sent from the proxy.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,13 @@
NO_COURSES = "1 = 0"


def can_view_courses(username, field_name="course_id"):
def can_view_courses(username, field_name="course_id", **kwargs):
"""
Returns SQL WHERE clause which restricts access to the courses the current user has staff access to.
Returns SQL WHERE clause which restricts access to the courses the current user has
staff access to.

We accept kwargs for optional caching args, since this is memoized in
can_view_courses_wrapper.
"""
user = security_manager.get_user_by_username(username)
if user:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from authlib.common.urls import add_params_to_qs, add_params_to_uri
from authlib.integrations.flask_client import OAuthError
from flask import current_app, session
from superset.extensions import cache_manager
from superset.security import SupersetSecurityManager

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -79,6 +80,9 @@ def oauth_user_info(self, provider, response=None):
user_profile.get("preferred_username"), locale_preference
)

log.info(f"User roles for {user_profile['preferred_username']}:"
f" {user_roles}")

return {
"name": user_profile["name"],
"email": user_profile["email"],
Expand Down Expand Up @@ -161,12 +165,15 @@ def _get_user_roles(self, username, locale):
decoded_access_token = self.decoded_user_info()

if decoded_access_token.get("superuser", False):
return ["admin", "admin-{locale}"]
return ["admin", f"admin-{locale}"]
elif decoded_access_token.get("administrator", False):
return ["alpha", "operator", "operator-{locale}"]
return ["alpha", "operator", f"operator-{locale}"]
else:
# User has to have staff access to one or more courses to view any content here.
courses = self.get_courses(username)
# User has to have staff access to one or more courses to view any content
# here. Since this is only called on login, we take the opportunity
# to force refresh the user's courses from LMS. This allows them to bust
# the course permissions cache if necessary by logging out and back in.
courses = self.get_courses(username, force=True)
if courses:
return ["instructor", f"instructor-{locale}"]
else:
Expand All @@ -188,10 +195,24 @@ def extra_get_user_roles(self, username, decoded_access_token):
{{patch("superset-sso-assignment-rules") | indent(8)}}
return None

def get_courses(self, username, permission="staff", next_url=None):
def get_courses(self, username, permission="staff", next_url=None, force=False):
"""
Returns the list of courses the current user has access to.

Calls itself recursively if the response is paginated.
Force will cause a force refresh of the cache, we do this on login to make
sure that the user always has an ability to update this data.
"""
cache = cache_manager.cache
cache_key = f"{username}+{permission}"

# Only return from the cache when we're at the top level
# (not in recursion)
if not next_url and not force:
obj = cache.get(cache_key)
if obj is not None:
return obj

courses = []
provider = session.get("oauth_provider")
oauth_remote = self.oauth_remotes.get(provider)
Expand All @@ -209,7 +230,9 @@ def get_courses(self, username, permission="staff", next_url=None):
username=username, permission=permission
)
url = next_url or courses_url
response = oauth_remote.get(url, token=token).json()
resp = oauth_remote.get(url, token=token)
resp.raise_for_status()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously we ignored errors here which limited logging things like the LMS 429 rate limit errors, this will now raise them for easier debugging.

response = resp.json()

for course in response.get("results", []):
course_id = course.get("course_id")
Expand All @@ -224,4 +247,10 @@ def get_courses(self, username, permission="staff", next_url=None):
for course_id in next_courses:
courses.append(course_id)

log.info(f"Courses for {username}: {courses}")

# Only cache the top level (non-recursive) response
if not next_url:
cache.set(cache_key, courses, timeout=300)

return courses
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,9 @@ def get_env_variable(var_name: str, default: Optional[str] = None) -> str:

REDIS_HOST = get_env_variable("REDIS_HOST")
REDIS_PORT = get_env_variable("REDIS_PORT")
REDIS_CELERY_DB = get_env_variable("REDIS_CELERY_DB", "0")
REDIS_RESULTS_DB = get_env_variable("REDIS_RESULTS_DB", "1")
REDIS_CELERY_DB = get_env_variable("REDIS_CELERY_DB", "3")
REDIS_RESULTS_DB = get_env_variable("REDIS_RESULTS_DB", "4")
REDIS_CACHE_DB = get_env_variable("REDIS_CACHE_DB", "5")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moving these to some other databases in redis so they are easier to identify. Previously they shared namespace with LMS which made debugging challenging and increased the possibility of name collisions.

Ian2012 marked this conversation as resolved.
Show resolved Hide resolved
REDIS_PASSWORD = get_env_variable("REDIS_PASSWORD", "")

RESULTS_BACKEND = RedisCache(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_RESULTS_DB, key_prefix='superset_results')
Expand All @@ -88,9 +89,9 @@ def get_env_variable(var_name: str, default: Optional[str] = None) -> str:

# Cache for dashboard filter state
FILTER_STATE_CACHE_CONFIG: CacheConfig = {
"CACHE_DEFAULT_TIMEOUT": int(timedelta(days=90).total_seconds()),
"CACHE_DEFAULT_TIMEOUT": 1200,
# should the timeout be reset when retrieving a cached value
"REFRESH_TIMEOUT_ON_RETRIEVAL": True,
"REFRESH_TIMEOUT_ON_RETRIEVAL": False,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bumping down the time and forcing these to refresh now, there could be some confusing artifacts when the filters got cached but a user's permissions had changed. This cuts down the possible length of time for that state to 20 mins. Artifacts were things like problem and video names appearing selected in the filters when the user no longer had access to them, though the charts do the right thing and do not display the data.

**CACHE_CONFIG,
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,13 @@
from openedx_jinja_filters import *

def can_view_courses_wrapper(*args, **kwargs):
"""
Wraps the can_view_courses call in a cache for performance.
"""
from superset.utils.cache import memoized_func
from superset.extensions import cache_manager

return memoized_func(key="{username}", cache=cache_manager.cache)(can_view_courses)(*args, **kwargs)
kwargs["cache_timeout"] = {{ SUPERSET_USER_PERMISSIONS_CACHE_TIMEOUT }}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was the actual bug in caching user permissions. Superset docstring says the memoized_func defaults to 300 secs, but it is actually 0 (indefinite). Passing in an explicit timeout fixes it.

return memoized_func()(can_view_courses)(*args, **kwargs)


JINJA_CONTEXT_ADDONS = {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
_file_name: Instructor_Dashboard.yaml
_roles:
- {{ SUPERSET_ROLES_MAPPING.instructor }}
- "{{ SUPERSET_ROLES_MAPPING.instructor }}"
css: ''
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the localization process to work now we need to wrap all curly braces in double quotes. This still needs testing on the translations_pull side once the existing translations are updated, but I think it will work.

dashboard_title: Instructor Dashboard
description: null
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
_file_name: Operator_Dashboard.yaml
_roles:
- {{ SUPERSET_ROLES_MAPPING.operator }}
- "{{ SUPERSET_ROLES_MAPPING.operator }}"
css: ''
dashboard_title: Operator Dashboard
description: null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@ extra:
engine_params: {}
metadata_params: {}
schemas_allowed_for_file_upload: []
sqlalchemy_uri: {{CLICKHOUSE_REPORT_SQLALCHEMY_URI}}
sqlalchemy_uri: "{{CLICKHOUSE_REPORT_SQLALCHEMY_URI}}"
uuid: 21174b6c-4d40-4958-8161-d6c3cf5e77b6
version: 1.0.0
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,9 @@ metrics:
warning_text: null
offset: 0
params: null
schema: {{ ASPECTS_EVENT_SINK_DATABASE }}
schema: "{{ ASPECTS_EVENT_SINK_DATABASE }}"
sql: null
table_name: {{ ASPECTS_EVENT_SINK_NODES_TABLE }}
table_name: "{{ ASPECTS_EVENT_SINK_NODES_TABLE }}"
template_params: null
uuid: 1b73d066-fd6c-411d-a99d-fc585f9474b1
version: 1.0.0
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ metrics:
normalize_columns: true
offset: 0
params: null
schema: {{ ASPECTS_EVENT_SINK_DATABASE }}
schema: "{{ ASPECTS_EVENT_SINK_DATABASE }}"
sql: null
table_name: course_names
template_params: null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ metrics:
normalize_columns: true
offset: 0
params: null
schema: {{ ASPECTS_EVENT_SINK_DATABASE }}
schema: "{{ ASPECTS_EVENT_SINK_DATABASE }}"
sql: null
table_name: course_overviews
template_params: null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ metrics:
normalize_columns: true
offset: 0
params: null
schema: {{ASPECTS_XAPI_DATABASE}}
schema: "{{ASPECTS_XAPI_DATABASE}}"
sql: ''
table_name: xapi_events_all_parsed
template_params: null
Expand Down