Skip to content

Commit

Permalink
some details in string_ops
Browse files Browse the repository at this point in the history
  • Loading branch information
fchapoton committed Jan 3, 2025
1 parent c9dd1e8 commit 3fc468c
Showing 1 changed file with 22 additions and 7 deletions.
29 changes: 22 additions & 7 deletions src/sage/monoids/string_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
#
# https://www.gnu.org/licenses/
# ****************************************************************************
from typing import Any

from sage.misc.lazy_import import lazy_import
from .string_monoid_element import StringMonoidElement

lazy_import('sage.rings.real_mpfr', 'RealField')

from .string_monoid_element import StringMonoidElement


def strip_encoding(S) -> str:
"""
Expand All @@ -27,7 +27,6 @@ def strip_encoding(S) -> str:
TESTS::
sage: S = "The cat in the hat."
sage: strip_encoding(44)
Traceback (most recent call last):
...
Expand All @@ -41,24 +40,40 @@ def strip_encoding(S) -> str:
def frequency_distribution(S, n=1, field=None):
"""
The probability space of frequencies of n-character substrings of S.
EXAMPLES::
sage: frequency_distribution('banana not a nana nor ananas', 2)
Discrete probability space defined by {' a': 0.0740740740740741,
' n': 0.111111111111111,
'a ': 0.111111111111111,
'an': 0.185185185185185,
'as': 0.0370370370370370,
'ba': 0.0370370370370370,
'na': 0.222222222222222,
'no': 0.0740740740740741,
'or': 0.0370370370370370,
'ot': 0.0370370370370370,
'r ': 0.0370370370370370,
't ': 0.0370370370370370}
"""
from sage.probability.random_variable import DiscreteProbabilitySpace
if isinstance(S, tuple):
S = list(S)
elif isinstance(S, (str, StringMonoidElement)):
S = [S[i:i+n] for i in range(len(S)-n+1)]
if field is None:
field = RealField()
if isinstance(S, list):
P = {}
P: dict[str, Any] = {}
N = len(S)
eps = field(1)/N
eps = field.one() / N
for i in range(N):
c = S[i]
if c in P:
P[c] += eps
else:
P[c] = eps
from sage.probability.random_variable import DiscreteProbabilitySpace
return DiscreteProbabilitySpace(S, P, field)
raise TypeError("Argument S (= %s) must be a string, list, or tuple.")

Expand All @@ -80,7 +95,7 @@ def coincidence_index(S, n=1):
raise TypeError("Argument S (= %s) must be a string.")
S = strip_encoding(S)
N = len(S)-n+1
X = {}
X: dict[str, int] = {}
for i in range(N):
c = S[i:i+n]
if c in X:
Expand Down

0 comments on commit 3fc468c

Please sign in to comment.