Skip to content

Commit

Permalink
FIX: Open metadata with utf-8 encoding
Browse files Browse the repository at this point in the history
When calling 'open' without specifying the encoding Python will use
`locale.getencoding()` for Python 3.11+ and
`locale.getpreferredencoding(False)` (with or without the `False`)
for Python 3.8 to determine which encoding to open the file with.

RMS in interactive mode sets this to ANSI_X3.4-1968 (a weird way of
saying ascii). This can cause opening the file to fail when non-ascii
characters are present within it.
  • Loading branch information
mferrera committed Jan 28, 2025
1 parent da21732 commit ed26222
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 3 deletions.
6 changes: 3 additions & 3 deletions src/fmu/dataio/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def export_file(
serialized_json = json.dumps(obj)

if isinstance(file, Path):
with open(file, "w") as stream:
with open(file, "w", encoding="utf-8") as stream:
stream.write(serialized_json)
else:
file.write(serialized_json.encode("utf-8"))
Expand Down Expand Up @@ -220,7 +220,7 @@ def read_parameters_txt(pfile: Path | str) -> types.Parameters:

res: types.Parameters = {}

with open(pfile) as f:
with open(pfile, encoding="utf-8") as f:
for line in f:
line_parts = shlex.split(line)
if len(line_parts) == 2:
Expand Down Expand Up @@ -346,7 +346,7 @@ def read_metadata_from_file(filename: str | Path) -> dict:
metafilepath = Path(metafile)
if not metafilepath.exists():
raise OSError(f"Cannot find requested metafile: {metafile}")
with open(metafilepath) as stream:
with open(metafilepath, encoding="utf-8") as stream:
return yaml.safe_load(stream)


Expand Down
37 changes: 37 additions & 0 deletions tests/test_units/test_rms_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
interactive or from ERT. Hence the rootpath will be ../../
"""

import builtins
import logging
import os
import shutil
from copy import deepcopy
from pathlib import Path

import pandas as pd
Expand Down Expand Up @@ -616,6 +618,41 @@ def test_gridproperty_export_with_geometry(inside_rms_setup, grid, gridproperty)
assert "this_is_parent" in output


@inside_rms
def test_gridproperty_export_with_geometry_and_bad_character(
inside_rms_setup, grid, gridproperty, monkeypatch
):
"""Ensures a non-ascii character in masterdata does not cause encoding parsing
failures"""
original_open = builtins.open

def open_with_ansi(file, mode="r", *args, **kwargs):
if "r" in mode and "b" not in mode and "encoding" not in kwargs:
kwargs["encoding"] = "ANSI_X3.4-1968"
return original_open(file, mode, *args, **kwargs)

monkeypatch.setattr(builtins, "open", open_with_ansi)

cfg = deepcopy(inside_rms_setup["config"])

cfg["masterdata"]["smda"]["field"][0]["identifier"] = "Drogøn"

grd_edata = dataio.ExportData(
config=cfg,
name="geogrid",
content={"property": {"is_discrete": False}},
)
outgrid = grd_edata.export(grid)

dataio.ExportData(
config=cfg,
name="geogrid",
content={"property": {"is_discrete": False}},
geometry=outgrid,
).export(gridproperty)
# Will raise an exception if decoding fails


# ======================================================================================
# Dataframe and PyArrow
# ======================================================================================
Expand Down

0 comments on commit ed26222

Please sign in to comment.