Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: show math in plain text in library cards #36055

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion openedx/core/djangoapps/content/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from rest_framework.exceptions import NotFound

from openedx.core.djangoapps.content.search.models import SearchAccess
from openedx.core.djangoapps.content.search.plain_text_math import process_mathjax
from openedx.core.djangoapps.content_libraries import api as lib_api
from openedx.core.djangoapps.content_tagging import api as tagging_api
from openedx.core.djangoapps.xblock import api as xblock_api
Expand Down Expand Up @@ -220,7 +221,7 @@ class implementation returns only:
# Generate description from the content
description = _get_description_from_block_content(block_type, content_data)
if description:
block_data[Fields.description] = description
block_data[Fields.description] = process_mathjax(description)

except Exception as err: # pylint: disable=broad-except
log.exception(f"Failed to process index_dictionary for {block.usage_key}: {err}")
Expand Down
103 changes: 103 additions & 0 deletions openedx/core/djangoapps/content/search/plain_text_math.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
"""
Helper class to convert mathjax equations to plain text.
"""

import re

import unicodeit


class PlainTextMath:
"""
Converts mathjax equations to plain text using unicodeit and some preprocessing.
"""
equation_pattern = re.compile(
r'\[mathjaxinline\](.*?)\[\/mathjaxinline\]|\[mathjax\](.*?)\[\/mathjax\]|\\\((.*?)\\\)|\\\[(.*?)\\\]'
)
eqn_replacements = (
# just remove prefix `\`
("\\sin", "sin"),
("\\cos", "cos"),
("\\tan", "tan"),
("\\arcsin", "arcsin"),
("\\arccos", "arccos"),
("\\arctan", "arctan"),
("\\cot", "cot"),
("\\sec", "sec"),
("\\csc", "csc"),
# Is used for matching brackets in mathjax, should be required in plain text.
("\\left", ""),
("\\right", ""),
)
regex_replacements = (
(re.compile(r'\\mathbf{(.*?)}'), r"\1"),
)

def _fraction_handler(self, equation: str) -> str:
"""
Converts `\\frac{x}{y}` to `(x/y)` while handling nested `{}`.

For example: `\\frac{2}{\\sqrt{1+y}}` is converted to `(2/\\sqrt{1+y})`.

Args:
equation: string

Returns:
String with `\\frac` replaced by normal `/` symbol.
"""
start_index = equation.find("\\frac{")
if start_index == -1:
return equation
mid_index = equation.find("}{")
if mid_index == -1:
return equation

numerator = equation[start_index + 6:mid_index]
# shift mid_index by length of }{ chars i.e., 2
mid_index += 2
open_count = 0

for i, char in enumerate(equation[mid_index:]):
if char == "{":
open_count += 1
if char == "}":
if open_count == 0:
break
open_count -= 1
else:
# Invalid `\frac` format
return equation

denominator = equation[mid_index:mid_index + i]
# Now re-create the equation with `(numerator / denominator)`
equation = equation[:start_index] + f"({numerator}/{denominator})" + equation[mid_index + i + 1:]
return equation

def _handle_replacements(self, equation: str) -> str:
"""
Makes a bunch of replacements in equation string.
"""
for q, replacement in self.eqn_replacements:
equation = equation.replace(q, replacement)
for pattern, replacement in self.regex_replacements:
equation = re.sub(pattern, replacement, equation)
return equation

def run(self, eqn_matches: re.Match) -> str:
"""
Takes re.Match object and runs conversion process on each match group.
"""
groups = eqn_matches.groups()
for group in groups:
if group:
group = self._fraction_handler(group)
group = self._handle_replacements(group)
return unicodeit.replace(group)
return None


processor = PlainTextMath()


def process_mathjax(content: str) -> str:
return re.sub(processor.equation_pattern, processor.run, content)
78 changes: 78 additions & 0 deletions openedx/core/djangoapps/content/search/tests/test_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,3 +477,81 @@ def test_collection_with_published_library(self):
"num_children": 1
}
}

def test_mathjax_plain_text_conversion_for_search(self):
"""
Test how an HTML block with mathjax equations gets converted to plain text in search description.
"""
# pylint: disable=line-too-long
block = BlockFactory.create(
parent_location=self.toy_course.location,
category="html",
display_name="Non-default HTML Block",
editor="raw",
use_latex_compiler=True,
data=(
"Simple addition: \\( 2 + 3 \\) |||"
" Simple subtraction: \\( 5 - 2 \\) |||"
" Simple multiplication: \\( 4 * 6 \\) |||"
" Simple division: \\( 8 / 2 \\) |||"
" Mixed arithmetic: \\( 2 + 3 4 \\) |||"
" Simple exponentiation: \\[ 2^3 \\] |||"
" Root extraction: \\[ 16^{1/2} \\] |||"
" Exponent with multiple terms: \\[ (2 + 3)^2 \\] |||"
" Nested exponents: \\[ 2^(3^2) \\] |||"
" Mixed roots: \\[ 8^{1/2} 3^2 \\] |||"
" Simple fraction: [mathjaxinline] 3/4 [/mathjaxinline] |||"
" Decimal to fraction conversion: [mathjaxinline] 0.75 = 3/4 [/mathjaxinline] |||"
" Mixed fractions: [mathjaxinline] 1 1/2 = 3/2 [/mathjaxinline] |||"
" Converting decimals to mixed fractions: [mathjaxinline] 2.5 = 5/2 [/mathjaxinline] |||"
" Sine, cosine, and tangent: [mathjaxinline] \\sin(x) [/mathjaxinline] [mathjaxinline] \\cos(x) [/mathjaxinline] [mathjaxinline] \\tan(x) [/mathjaxinline] |||"
" Trig identities: [mathjaxinline] \\sin(x + y) = \\sin(x) \\cos(y) + \\cos(x) \\sin(y) [/mathjaxinline] |||"
" Hyperbolic trig functions: [mathjaxinline] \\sinh(x) [/mathjaxinline] [mathjaxinline] \\cosh(x) [/mathjaxinline] |||"
" Simple derivative: [mathjax] f(x) = x^2, f'(x) = 2x [/mathjax] |||"
" Double integral: [mathjax] int\\int (x + y) dxdy [/mathjax] |||"
" Partial derivatives: [mathjax] f(x,y) = xy, \\frac{\\partial f}{\\partial x} = y [/mathjax] [mathjax] \\frac{\\partial f}{\\partial y} = x [/mathjax] |||"
" Mean and standard deviation: [mathjax] mu = 2, \\sigma = 1 [/mathjax] |||"
" Binomial probability: [mathjax] P(X = k) = (\\binom{n}{k} p^k (1-p)^{n-k}) [/mathjax] |||"
" Gaussian distribution: [mathjax] N(\\mu, \\sigma^2) [/mathjax] |||"
" Greek letters: [mathjaxinline] \\alpha [/mathjaxinline] [mathjaxinline] \\beta [/mathjaxinline] [mathjaxinline] \\gamma [/mathjaxinline] |||"
" Subscripted variables: [mathjaxinline] x_i [/mathjaxinline] [mathjaxinline] y_j [/mathjaxinline] |||"
" Superscripted variables: [mathjaxinline] x^{i} [/mathjaxinline] |||"
" Not supported: \\( \\begin{bmatrix} 1 & 0 \\ 0 & 1 \\end{bmatrix} = I \\)"
),
)
# pylint: enable=line-too-long
doc = {}
doc.update(searchable_doc_for_course_block(block))
doc.update(searchable_doc_tags(block.usage_key))
expected_equations = [
'Simple addition: 2 + 3',
'Simple subtraction: 5 − 2',
'Simple multiplication: 4 * 6',
'Simple division: 8 / 2',
'Mixed arithmetic: 2 + 3 4',
'Simple exponentiation: 2³',
'Root extraction: 16¹^/²',
'Exponent with multiple terms: (2 + 3)²',
'Nested exponents: 2⁽3²)',
'Mixed roots: 8¹^/² 3²',
'Simple fraction: 3/4',
'Decimal to fraction conversion: 0.75 = 3/4',
'Mixed fractions: 1 1/2 = 3/2',
'Converting decimals to mixed fractions: 2.5 = 5/2',
'Sine, cosine, and tangent: sin(x) cos(x) tan(x)',
'Trig identities: sin(x + y) = sin(x) cos(y) + cos(x) sin(y)',
'Hyperbolic trig functions: sinh(x) cosh(x)',
"Simple derivative: f(x) = x², f'(x) = 2x",
'Double integral: int∫ (x + y) dxdy',
'Partial derivatives: f(x,y) = xy, (∂ f/∂ x) = y (∂ f/∂ y) = x',
'Mean and standard deviation: mu = 2, σ = 1',
'Binomial probability: P(X = k) = (\\binom{n}{k} pᵏ (1−p)ⁿ⁻ᵏ)',
'Gaussian distribution: N(μ, σ²)',
'Greek letters: α β γ',
'Subscripted variables: xᵢ yⱼ',
'Superscripted variables: xⁱ',
'Not supported: \\begin{bmatrix} 1 & 0 \\ 0 & 1 \\end{bmatrix} = I',
]
eqns = doc['description'].split('|||')
for i, eqn in enumerate(eqns):
assert eqn.strip() == expected_equations[i]
2 changes: 2 additions & 0 deletions requirements/edx/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1235,6 +1235,8 @@ unicodecsv==0.14.1
# via
# -r requirements/edx/kernel.in
# edx-enterprise
unicodeit==0.7.5
# via -r requirements/edx/kernel.in
uritemplate==4.1.1
# via
# drf-spectacular
Expand Down
4 changes: 4 additions & 0 deletions requirements/edx/development.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2178,6 +2178,10 @@ unicodecsv==0.14.1
# -r requirements/edx/doc.txt
# -r requirements/edx/testing.txt
# edx-enterprise
unicodeit==0.7.5
# via
# -r requirements/edx/doc.txt
# -r requirements/edx/testing.txt
unidiff==0.7.5
# via -r requirements/edx/testing.txt
uritemplate==4.1.1
Expand Down
2 changes: 2 additions & 0 deletions requirements/edx/doc.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1530,6 +1530,8 @@ unicodecsv==0.14.1
# via
# -r requirements/edx/base.txt
# edx-enterprise
unicodeit==0.7.5
# via -r requirements/edx/base.txt
uritemplate==4.1.1
# via
# -r requirements/edx/base.txt
Expand Down
1 change: 1 addition & 0 deletions requirements/edx/kernel.in
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,4 @@ webob
web-fragments # Provides the ability to render fragments of web pages
XBlock[django] # Courseware component architecture
xss-utils # https://github.com/openedx/edx-platform/pull/20633 Fix XSS via Translations
unicodeit # Converts mathjax equation to plain text by using unicode symbols
2 changes: 2 additions & 0 deletions requirements/edx/testing.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1612,6 +1612,8 @@ unicodecsv==0.14.1
# via
# -r requirements/edx/base.txt
# edx-enterprise
unicodeit==0.7.5
# via -r requirements/edx/base.txt
unidiff==0.7.5
# via -r requirements/edx/testing.in
uritemplate==4.1.1
Expand Down
Loading