Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

clib.Session.virtualfile_from_vectors: Now takes a sequence of vectors as its single argument (Passing multiple arguments will be unsupported in v0.16.0) #3522

Merged
merged 14 commits into from
Oct 22, 2024
Merged
51 changes: 31 additions & 20 deletions pygmt/clib/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -1330,7 +1330,7 @@ def open_virtual_file(self, family, geometry, direction, data):
return self.open_virtualfile(family, geometry, direction, data)

@contextlib.contextmanager
def virtualfile_from_vectors(self, *vectors):
def virtualfile_from_vectors(self, vectors, *args):
seisman marked this conversation as resolved.
Show resolved Hide resolved
"""
Store 1-D arrays as columns of a table inside a virtual file.

Expand Down Expand Up @@ -1372,13 +1372,24 @@ def virtualfile_from_vectors(self, *vectors):
>>> y = np.array([4, 5, 6])
>>> z = pd.Series([7, 8, 9])
>>> with Session() as ses:
... with ses.virtualfile_from_vectors(x, y, z) as fin:
... with ses.virtualfile_from_vectors((x, y, z)) as fin:
... # Send the output to a file so that we can read it
... with GMTTempFile() as fout:
... ses.call_module("info", [fin, f"->{fout.name}"])
... print(fout.read().strip())
<vector memory>: N = 3 <1/3> <4/6> <7/9>
"""
if len(args) > 0:
warnings.warn(
"Passing multiple arguments to Session.virtualfile_fro_vectors is "
seisman marked this conversation as resolved.
Show resolved Hide resolved
"deprecated since v0.14.0 and will be unsupported in v0.16.0. "
"Pass all vectors as a single tuple instead, e.g., "
"Use `with lib.virtualfile_from_vectors((x, y, z)) as vfile` "
"instead of `with lib.virtualfile_from_vectors(x, y, z) as vfile`.",
category=FutureWarning,
stacklevel=3,
)
vectors = (vectors, *args)
# Conversion to a C-contiguous array needs to be done here and not in
# put_vector or put_strings because we need to maintain a reference to
# the copy while it is being used by the C API. Otherwise, the array
Expand Down Expand Up @@ -1791,19 +1802,19 @@ def virtualfile_in( # noqa: PLR0912
"vectors": self.virtualfile_from_vectors,
}[kind]

# Ensure the data is an iterable (Python list or tuple).
# "_data" is the data that will be passed to the _virtualfile_from function.
# "_data" defaults to "data" but should be adjusted for some cases.
_data = data
match kind:
case "arg" | "file" | "geojson" | "grid" | "image" | "stringio":
Comment on lines +1798 to -1786
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe just put _data = data in the case _ below? I'd actually prefer if we make it more explicit like:

case "arg" | "file" | "geojson" | "grid" | "stringio":
    _data = data

So that if there's a new kind in the future, we know to add it explicitly.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd actually prefer if we make it more explicit like:

case "arg" | "file" | "geojson" | "grid" | "stringio":
    _data = data

So that if there's a new kind in the future, we know to add it explicitly.

We can't do it like this, because we're currently writing case statements like case "image" if data.dtype != "uint8":, which only catches the case when kind="image" and data.dtype != "uint8".

We can refactor the codes to:

case "arg" | "file" | "geojson" | "grid" | "stringio":
    _data = data
case "image":
    _data = data
    if data.dtype != "uint8":
        # do something
case "matrix":
    _data = data
    if data.dtype.kind not in "iuf":
    # do something

or

case "arg" | "file" | "geojson" | "grid" | "stringio":
    _data = data
case "image" if data.dtype != "uint8":
    raise ...
case "matrix" if data.dtype.kind not in "iuf":
    ...
case _:
    _data = data # this is necessary to catch cases like kind="matrix" and dtype in "iuf"  

I still prefer the current codes, which set _data to data by default and the match-case statements only need to deal with exceptions.

So that if there's a new kind in the future, we know to add it explicitly.

If there is a new kind in the future, _data=data is still the default. If needs special handling and we forget to add it, then the related tests will fail anyway.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah ok, that makes sense, let's keep _data = data at the top then.

_data = (data,)
if kind == "image" and data.dtype != "uint8":
msg = (
f"Input image has dtype: {data.dtype} which is unsupported, "
"and may result in an incorrect output. Please recast image "
"to a uint8 dtype and/or scale to 0-255 range, e.g. "
"using a histogram equalization function like "
"skimage.exposure.equalize_hist."
)
warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2)
case "image" if data.dtype != "uint8":
msg = (
f"Input image has dtype: {data.dtype} which is unsupported, "
"and may result in an incorrect output. Please recast image "
"to a uint8 dtype and/or scale to 0-255 range, e.g. "
"using a histogram equalization function like "
"skimage.exposure.equalize_hist."
)
warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2)
case "empty": # data is None, so data must be given via x/y/z.
_data = [x, y]
if z is not None:
Expand All @@ -1818,19 +1829,19 @@ def virtualfile_in( # noqa: PLR0912
else:
# Python list, tuple, numpy.ndarray, and pandas.Series types
_data = np.atleast_2d(np.asanyarray(data).T)
case "matrix":
case "matrix" if data.dtype.kind not in "iuf":
# GMT can only accept a 2-D matrix which are signed integer (i),
# unsigned integer (u) or floating point (f) types. For other data
# types, we need to use virtualfile_from_vectors instead, which turns
# the matrix into a list of vectors and allows for better handling of
# non-integer/float type inputs (e.g. for string or datetime data types)
_data = (data,)
if data.dtype.kind not in "iuf":
_virtualfile_from = self.virtualfile_from_vectors
_data = data.T
_virtualfile_from = self.virtualfile_from_vectors
_data = data.T
case _:
pass

# Finally create the virtualfile from the data, to be passed into GMT
file_context = _virtualfile_from(*_data)
file_context = _virtualfile_from(_data)
return file_context

def virtualfile_from_data(
Expand Down
37 changes: 30 additions & 7 deletions pygmt/tests/test_clib_virtualfile_from_vectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_virtualfile_from_vectors(dtypes):
y = np.arange(size, size * 2, 1, dtype=dtype)
z = np.arange(size * 2, size * 3, 1, dtype=dtype)
with clib.Session() as lib:
with lib.virtualfile_from_vectors(x, y, z) as vfile:
with lib.virtualfile_from_vectors((x, y, z)) as vfile:
with GMTTempFile() as outfile:
lib.call_module("info", [vfile, f"->{outfile.name}"])
output = outfile.read(keep_tabs=True)
Expand All @@ -64,7 +64,7 @@ def test_virtualfile_from_vectors_one_string_or_object_column(dtype):
y = np.arange(size, size * 2, 1, dtype=np.int32)
strings = np.array(["a", "bc", "defg", "hijklmn", "opqrst"], dtype=dtype)
with clib.Session() as lib:
with lib.virtualfile_from_vectors(x, y, strings) as vfile:
with lib.virtualfile_from_vectors((x, y, strings)) as vfile:
with GMTTempFile() as outfile:
lib.call_module("convert", [vfile, f"->{outfile.name}"])
output = outfile.read(keep_tabs=True)
Expand All @@ -86,7 +86,7 @@ def test_virtualfile_from_vectors_two_string_or_object_columns(dtype):
strings1 = np.array(["a", "bc", "def", "ghij", "klmnolooong"], dtype=dtype)
strings2 = np.array(["pqrst", "uvwx", "yz!", "@#", "$"], dtype=dtype)
with clib.Session() as lib:
with lib.virtualfile_from_vectors(x, y, strings1, strings2) as vfile:
with lib.virtualfile_from_vectors((x, y, strings1, strings2)) as vfile:
with GMTTempFile() as outfile:
lib.call_module("convert", [vfile, f"->{outfile.name}"])
output = outfile.read(keep_tabs=True)
Expand All @@ -105,7 +105,7 @@ def test_virtualfile_from_vectors_transpose(dtypes):
for dtype in dtypes:
data = np.arange(shape[0] * shape[1], dtype=dtype).reshape(shape)
with clib.Session() as lib:
with lib.virtualfile_from_vectors(*data.T) as vfile:
with lib.virtualfile_from_vectors(data.T) as vfile:
with GMTTempFile() as outfile:
lib.call_module("info", [vfile, "-C", f"->{outfile.name}"])
output = outfile.read(keep_tabs=True)
Expand All @@ -122,7 +122,7 @@ def test_virtualfile_from_vectors_diff_size():
y = np.arange(6)
with clib.Session() as lib:
with pytest.raises(GMTInvalidInput):
with lib.virtualfile_from_vectors(x, y):
with lib.virtualfile_from_vectors((x, y)):
pass


Expand All @@ -143,7 +143,7 @@ def test_virtualfile_from_vectors_pandas(dtypes_pandas):
dtype=dtype,
)
with clib.Session() as lib:
with lib.virtualfile_from_vectors(data.x, data.y, data.z) as vfile:
with lib.virtualfile_from_vectors((data.x, data.y, data.z)) as vfile:
with GMTTempFile() as outfile:
lib.call_module("info", [vfile, f"->{outfile.name}"])
output = outfile.read(keep_tabs=True)
Expand All @@ -163,10 +163,33 @@ def test_virtualfile_from_vectors_arraylike():
y = tuple(range(size, size * 2, 1))
z = range(size * 2, size * 3, 1)
with clib.Session() as lib:
with lib.virtualfile_from_vectors(x, y, z) as vfile:
with lib.virtualfile_from_vectors((x, y, z)) as vfile:
with GMTTempFile() as outfile:
lib.call_module("info", [vfile, f"->{outfile.name}"])
output = outfile.read(keep_tabs=True)
bounds = "\t".join([f"<{min(i):.0f}/{max(i):.0f}>" for i in (x, y, z)])
expected = f"<vector memory>: N = {size}\t{bounds}\n"
assert output == expected


def test_virtualfile_from_vectors_args():
"""
Test the backward compatibility of the deprecated syntax for passing multiple
vectors.

This test is the same as test_virtualfile_from_vectors_arraylike, but using the
old syntax.
"""
size = 13
x = list(range(0, size, 1))
y = tuple(range(size, size * 2, 1))
z = range(size * 2, size * 3, 1)
with pytest.warns(FutureWarning, match="virtualfile_from_vectors"):
with clib.Session() as lib:
with lib.virtualfile_from_vectors(x, y, z) as vfile:
with GMTTempFile() as outfile:
lib.call_module("info", [vfile, f"->{outfile.name}"])
output = outfile.read(keep_tabs=True)
bounds = "\t".join([f"<{min(i):.0f}/{max(i):.0f}>" for i in (x, y, z)])
expected = f"<vector memory>: N = {size}\t{bounds}\n"
assert output == expected