Skip to content

Commit

Permalink
clib.Session.virtualfile_from_vectors: Now takes a sequence of vector…
Browse files Browse the repository at this point in the history
…s as its single argument (Passing multiple arguments will be unsupported in v0.16.0) (#3522)
  • Loading branch information
seisman authored Oct 22, 2024
1 parent 0482394 commit 7eb93f2
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 58 deletions.
113 changes: 62 additions & 51 deletions pygmt/clib/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -1324,37 +1324,36 @@ def open_virtual_file(self, family, geometry, direction, data):
return self.open_virtualfile(family, geometry, direction, data)

@contextlib.contextmanager
def virtualfile_from_vectors(self, *vectors):
def virtualfile_from_vectors(
self, vectors: Sequence, *args
) -> Generator[str, None, None]:
"""
Store 1-D arrays as columns of a table inside a virtual file.
Store a sequence of 1-D vectors as columns of a dataset inside a virtual file.
Use the virtual file name to pass in the data in your vectors to a GMT
module.
Use the virtual file name to pass the dataset with your vectors to a GMT module.
Context manager (use in a ``with`` block). Yields the virtual file name
that you can pass as an argument to a GMT module call. Closes the
virtual file upon exit of the ``with`` block.
Context manager (use in a ``with`` block). Yields the virtual file name that you
can pass as an argument to a GMT module call. Closes the virtual file upon exit
of the ``with`` block.
Use this instead of creating the data container and virtual file by
hand with :meth:`pygmt.clib.Session.create_data`,
:meth:`pygmt.clib.Session.put_vector`, and
:meth:`pygmt.clib.Session.open_virtualfile`.
Use this instead of creating the data container and virtual file by hand with
:meth:`pygmt.clib.Session.create_data`, :meth:`pygmt.clib.Session.put_vector`,
and :meth:`pygmt.clib.Session.open_virtualfile`.
If the arrays are C contiguous blocks of memory, they will be passed
without copying to GMT. If they are not (e.g., they are columns of a
2-D array), they will need to be copied to a contiguous block.
If the arrays are C contiguous blocks of memory, they will be passed without
copying to GMT. If they are not (e.g., they are columns of a 2-D array), they
will need to be copied to a contiguous block.
Parameters
----------
vectors : 1-D arrays
The vectors that will be included in the array. All must be of the
vectors
A sequence of vectors that will be stored in the dataset. All must be of the
same size.
Yields
------
fname : str
The name of virtual file. Pass this as a file name argument to a
GMT module.
fname
The name of virtual file. Pass this as a file name argument to a GMT module.
Examples
--------
Expand All @@ -1366,34 +1365,49 @@ def virtualfile_from_vectors(self, *vectors):
>>> y = np.array([4, 5, 6])
>>> z = pd.Series([7, 8, 9])
>>> with Session() as ses:
... with ses.virtualfile_from_vectors(x, y, z) as fin:
... with ses.virtualfile_from_vectors((x, y, z)) as fin:
... # Send the output to a file so that we can read it
... with GMTTempFile() as fout:
... ses.call_module("info", [fin, f"->{fout.name}"])
... print(fout.read().strip())
<vector memory>: N = 3 <1/3> <4/6> <7/9>
"""
# Conversion to a C-contiguous array needs to be done here and not in
# put_vector or put_strings because we need to maintain a reference to
# the copy while it is being used by the C API. Otherwise, the array
# would be garbage collected and the memory freed. Creating it in this
# context manager guarantees that the copy will be around until the
# virtual file is closed. The conversion is implicit in
# "*args" is added in v0.14.0 for backward-compatibility with the deprecated
# syntax of passing multiple vectors as positional arguments.
# Remove it in v0.16.0.
if len(args) > 0:
msg = (
"Passing multiple arguments to Session.virtualfile_from_vectors is "
"deprecated since v0.14.0 and will be unsupported in v0.16.0. "
"Put all vectors in a sequence (a tuple or a list) instead and pass "
"the sequence as the single argument to this function. "
"E.g., use `with lib.virtualfile_from_vectors((x, y, z)) as vfile` "
"instead of `with lib.virtualfile_from_vectors(x, y, z) as vfile`."
)
warnings.warn(message=msg, category=FutureWarning, stacklevel=3)
vectors = (vectors, *args)

# Conversion to a C-contiguous array needs to be done here and not in put_vector
# or put_strings because we need to maintain a reference to the copy while it is
# being used by the C API. Otherwise, the array would be garbage collected and
# the memory freed. Creating it in this context manager guarantees that the copy
# will be around until the virtual file is closed. The conversion is implicit in
# vectors_to_arrays.
arrays = vectors_to_arrays(vectors)

columns = len(arrays)
# Find arrays that are of string dtype from column 3 onwards
# Assumes that first 2 columns contains coordinates like longitude
# latitude, or datetime string types.
# Find arrays that are of string dtype from column 3 onwards. Assumes that first
# 2 columns contains coordinates like longitude, latitude, or datetime string
# types.
for col, array in enumerate(arrays[2:]):
if pd.api.types.is_string_dtype(array.dtype):
columns = col + 2
break

rows = len(arrays[0])
if not all(len(i) == rows for i in arrays):
raise GMTInvalidInput("All arrays must have same size.")
msg = "All arrays must have same size."
raise GMTInvalidInput(msg)

family = "GMT_IS_DATASET|GMT_VIA_VECTOR"
geometry = "GMT_IS_POINT"
Expand All @@ -1406,8 +1420,8 @@ def virtualfile_from_vectors(self, *vectors):
for col, array in enumerate(arrays[:columns]):
self.put_vector(dataset, column=col, vector=array)

# Use put_strings for last column(s) with string type data
# Have to use modifier "GMT_IS_DUPLICATE" to duplicate the strings
# Use put_strings for last column(s) with string type data.
# Have to use modifier "GMT_IS_DUPLICATE" to duplicate the strings.
string_arrays = arrays[columns:]
if string_arrays:
if len(string_arrays) == 1:
Expand Down Expand Up @@ -1682,7 +1696,7 @@ def virtualfile_from_stringio(
seg.header = None
seg.text = None

def virtualfile_in( # noqa: PLR0912
def virtualfile_in(
self,
check_kind=None,
data=None,
Expand Down Expand Up @@ -1781,19 +1795,18 @@ def virtualfile_in( # noqa: PLR0912
"vectors": self.virtualfile_from_vectors,
}[kind]

# Ensure the data is an iterable (Python list or tuple).
# "_data" is the data that will be passed to the _virtualfile_from function.
# "_data" defaults to "data" but should be adjusted for some cases.
_data = data
match kind:
case "arg" | "file" | "geojson" | "grid" | "image" | "stringio":
_data = (data,)
if kind == "image" and data.dtype != "uint8":
msg = (
f"Input image has dtype: {data.dtype} which is unsupported, "
"and may result in an incorrect output. Please recast image "
"to a uint8 dtype and/or scale to 0-255 range, e.g. "
"using a histogram equalization function like "
"skimage.exposure.equalize_hist."
)
warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2)
case "image" if data.dtype != "uint8":
msg = (
f"Input image has dtype: {data.dtype} which is unsupported, and "
"may result in an incorrect output. Please recast image to a uint8 "
"dtype and/or scale to 0-255 range, e.g. using a histogram "
"equalization function like skimage.exposure.equalize_hist."
)
warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2)
case "empty": # data is None, so data must be given via x/y/z.
_data = [x, y]
if z is not None:
Expand All @@ -1808,19 +1821,17 @@ def virtualfile_in( # noqa: PLR0912
else:
# Python list, tuple, numpy.ndarray, and pandas.Series types
_data = np.atleast_2d(np.asanyarray(data).T)
case "matrix":
case "matrix" if data.dtype.kind not in "iuf":
# GMT can only accept a 2-D matrix which are signed integer (i),
# unsigned integer (u) or floating point (f) types. For other data
# types, we need to use virtualfile_from_vectors instead, which turns
# the matrix into a list of vectors and allows for better handling of
# non-integer/float type inputs (e.g. for string or datetime data types)
_data = (data,)
if data.dtype.kind not in "iuf":
_virtualfile_from = self.virtualfile_from_vectors
_data = data.T
_virtualfile_from = self.virtualfile_from_vectors
_data = data.T

# Finally create the virtualfile from the data, to be passed into GMT
file_context = _virtualfile_from(*_data)
file_context = _virtualfile_from(_data)
return file_context

def virtualfile_from_data(
Expand Down
37 changes: 30 additions & 7 deletions pygmt/tests/test_clib_virtualfile_from_vectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_virtualfile_from_vectors(dtypes):
y = np.arange(size, size * 2, 1, dtype=dtype)
z = np.arange(size * 2, size * 3, 1, dtype=dtype)
with clib.Session() as lib:
with lib.virtualfile_from_vectors(x, y, z) as vfile:
with lib.virtualfile_from_vectors((x, y, z)) as vfile:
with GMTTempFile() as outfile:
lib.call_module("info", [vfile, f"->{outfile.name}"])
output = outfile.read(keep_tabs=True)
Expand All @@ -64,7 +64,7 @@ def test_virtualfile_from_vectors_one_string_or_object_column(dtype):
y = np.arange(size, size * 2, 1, dtype=np.int32)
strings = np.array(["a", "bc", "defg", "hijklmn", "opqrst"], dtype=dtype)
with clib.Session() as lib:
with lib.virtualfile_from_vectors(x, y, strings) as vfile:
with lib.virtualfile_from_vectors((x, y, strings)) as vfile:
with GMTTempFile() as outfile:
lib.call_module("convert", [vfile, f"->{outfile.name}"])
output = outfile.read(keep_tabs=True)
Expand All @@ -86,7 +86,7 @@ def test_virtualfile_from_vectors_two_string_or_object_columns(dtype):
strings1 = np.array(["a", "bc", "def", "ghij", "klmnolooong"], dtype=dtype)
strings2 = np.array(["pqrst", "uvwx", "yz!", "@#", "$"], dtype=dtype)
with clib.Session() as lib:
with lib.virtualfile_from_vectors(x, y, strings1, strings2) as vfile:
with lib.virtualfile_from_vectors((x, y, strings1, strings2)) as vfile:
with GMTTempFile() as outfile:
lib.call_module("convert", [vfile, f"->{outfile.name}"])
output = outfile.read(keep_tabs=True)
Expand All @@ -105,7 +105,7 @@ def test_virtualfile_from_vectors_transpose(dtypes):
for dtype in dtypes:
data = np.arange(shape[0] * shape[1], dtype=dtype).reshape(shape)
with clib.Session() as lib:
with lib.virtualfile_from_vectors(*data.T) as vfile:
with lib.virtualfile_from_vectors(data.T) as vfile:
with GMTTempFile() as outfile:
lib.call_module("info", [vfile, "-C", f"->{outfile.name}"])
output = outfile.read(keep_tabs=True)
Expand All @@ -122,7 +122,7 @@ def test_virtualfile_from_vectors_diff_size():
y = np.arange(6)
with clib.Session() as lib:
with pytest.raises(GMTInvalidInput):
with lib.virtualfile_from_vectors(x, y):
with lib.virtualfile_from_vectors((x, y)):
pass


Expand All @@ -143,7 +143,7 @@ def test_virtualfile_from_vectors_pandas(dtypes_pandas):
dtype=dtype,
)
with clib.Session() as lib:
with lib.virtualfile_from_vectors(data.x, data.y, data.z) as vfile:
with lib.virtualfile_from_vectors((data.x, data.y, data.z)) as vfile:
with GMTTempFile() as outfile:
lib.call_module("info", [vfile, f"->{outfile.name}"])
output = outfile.read(keep_tabs=True)
Expand All @@ -163,10 +163,33 @@ def test_virtualfile_from_vectors_arraylike():
y = tuple(range(size, size * 2, 1))
z = range(size * 2, size * 3, 1)
with clib.Session() as lib:
with lib.virtualfile_from_vectors(x, y, z) as vfile:
with lib.virtualfile_from_vectors((x, y, z)) as vfile:
with GMTTempFile() as outfile:
lib.call_module("info", [vfile, f"->{outfile.name}"])
output = outfile.read(keep_tabs=True)
bounds = "\t".join([f"<{min(i):.0f}/{max(i):.0f}>" for i in (x, y, z)])
expected = f"<vector memory>: N = {size}\t{bounds}\n"
assert output == expected


def test_virtualfile_from_vectors_args():
"""
Test the backward compatibility of the deprecated syntax for passing multiple
vectors.
This test is the same as test_virtualfile_from_vectors_arraylike, but using the
old syntax.
"""
size = 13
x = list(range(0, size, 1))
y = tuple(range(size, size * 2, 1))
z = range(size * 2, size * 3, 1)
with pytest.warns(FutureWarning, match="virtualfile_from_vectors"):
with clib.Session() as lib:
with lib.virtualfile_from_vectors(x, y, z) as vfile:
with GMTTempFile() as outfile:
lib.call_module("info", [vfile, f"->{outfile.name}"])
output = outfile.read(keep_tabs=True)
bounds = "\t".join([f"<{min(i):.0f}/{max(i):.0f}>" for i in (x, y, z)])
expected = f"<vector memory>: N = {size}\t{bounds}\n"
assert output == expected

0 comments on commit 7eb93f2

Please sign in to comment.