-
Notifications
You must be signed in to change notification settings - Fork 224
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add pygmt.read to read a dataset/grid/image into pandas.DataFrame/xarray.DataArray #3673
base: main
Are you sure you want to change the base?
Changes from 19 commits
d913c86
f456bf8
c3cbb6e
f2a4ce4
1dd97c6
7790ea3
e588008
40d12ee
fa1021d
c378225
7b749e0
8befa58
a758752
9d66cf4
a05383a
6ca4ef2
7851ced
084b87a
b21997c
a812317
1f0f158
957c7eb
6aef3ca
72afbfe
03de9b7
85c533d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -172,6 +172,7 @@ Input/output | |
:toctree: generated | ||
|
||
load_dataarray | ||
read | ||
|
||
GMT Defaults | ||
------------ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -54,6 +54,7 @@ | |
makecpt, | ||
nearneighbor, | ||
project, | ||
read, | ||
select, | ||
sph2grd, | ||
sphdistance, | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,118 @@ | ||||||
""" | ||||||
Read a file into an appropriate object. | ||||||
""" | ||||||
|
||||||
from collections.abc import Mapping, Sequence | ||||||
from pathlib import PurePath | ||||||
from typing import Any, Literal | ||||||
|
||||||
import pandas as pd | ||||||
import xarray as xr | ||||||
from pygmt.clib import Session | ||||||
from pygmt.helpers import build_arg_list, is_nonstr_iter | ||||||
from pygmt.src.which import which | ||||||
|
||||||
|
||||||
def read( | ||||||
file: str | PurePath, | ||||||
kind: Literal["dataset", "grid", "image"], | ||||||
region: Sequence[float] | str | None = None, | ||||||
header: int | None = None, | ||||||
column_names: pd.Index | None = None, | ||||||
dtype: type | Mapping[Any, type] | None = None, | ||||||
index_col: str | int | None = None, | ||||||
) -> pd.DataFrame | xr.DataArray: | ||||||
""" | ||||||
Read a dataset, grid, or image from a file and return the appropriate object. | ||||||
|
||||||
The returned object is a :class:`pandas.DataFrame` for datasets, and | ||||||
:class:`xarray.DataArray` for grids and images. | ||||||
|
||||||
For datasets, keyword arguments ``column_names``, ``header``, ``dtype``, and | ||||||
``index_col`` are supported. | ||||||
|
||||||
Parameters | ||||||
---------- | ||||||
file | ||||||
The file name to read. | ||||||
kind | ||||||
The kind of data to read. Valid values are ``"dataset"``, ``"grid"``, and | ||||||
``"image"``. | ||||||
region | ||||||
The region of interest. Only data within this region will be read. | ||||||
column_names | ||||||
A list of column names. | ||||||
header | ||||||
Row number containing column names. ``header=None`` means not to parse the | ||||||
column names from table header. Ignored if the row number is larger than the | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
number of headers in the table. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
dtype | ||||||
Data type. Can be a single type for all columns or a dictionary mapping | ||||||
column names to types. | ||||||
index_col | ||||||
Column to set as index. | ||||||
|
||||||
Returns | ||||||
------- | ||||||
data | ||||||
Return type depends on the ``kind`` argument: | ||||||
|
||||||
- ``"dataset"``: :class:`pandas.DataFrame` | ||||||
- ``"grid"`` or ``"image"``: :class:`xarray.DataArray` | ||||||
|
||||||
|
||||||
Examples | ||||||
-------- | ||||||
Read a dataset into a :class:`pandas.DataFrame` object: | ||||||
|
||||||
>>> from pygmt import read | ||||||
>>> df = read("@hotspots.txt", kind="dataset") | ||||||
>>> type(df) | ||||||
<class 'pandas.core.frame.DataFrame'> | ||||||
|
||||||
Read a grid into an :class:`xarray.DataArray` object: | ||||||
|
||||||
>>> dataarray = read("@earth_relief_01d", kind="grid") | ||||||
>>> type(dataarray) | ||||||
<class 'xarray.core.dataarray.DataArray'> | ||||||
""" | ||||||
if kind not in {"dataset", "grid", "image"}: | ||||||
msg = f"Invalid kind {kind}: must be one of 'dataset', 'grid', or 'image'." | ||||||
raise ValueError(msg) | ||||||
|
||||||
if kind != "dataset" and any( | ||||||
v is not None for v in [column_names, header, dtype, index_col] | ||||||
): | ||||||
msg = ( | ||||||
"Only the 'dataset' kind supports the 'column_names', 'header', " | ||||||
"'dtype', and 'index_col' arguments." | ||||||
) | ||||||
raise ValueError(msg) | ||||||
|
||||||
kwdict = { | ||||||
"R": "/".join(f"{v}" for v in region) if is_nonstr_iter(region) else region, # type: ignore[union-attr] | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This line is used here to avoid using the
|
||||||
"T": {"dataset": "d", "grid": "g", "image": "i"}[kind], | ||||||
} | ||||||
|
||||||
with Session() as lib: | ||||||
with lib.virtualfile_out(kind=kind) as voutfile: | ||||||
lib.call_module("read", args=[file, voutfile, *build_arg_list(kwdict)]) | ||||||
|
||||||
match kind: | ||||||
case "dataset": | ||||||
return lib.virtualfile_to_dataset( | ||||||
vfname=voutfile, | ||||||
column_names=column_names, | ||||||
header=header, | ||||||
dtype=dtype, | ||||||
index_col=index_col, | ||||||
) | ||||||
case "grid" | "image": | ||||||
raster = lib.virtualfile_to_raster(vfname=voutfile, kind=kind) | ||||||
Comment on lines
+102
to
+111
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Debating on whether we should have a low-level clib |
||||||
# Add "source" encoding | ||||||
source = which(fname=file) | ||||||
raster.encoding["source"] = ( | ||||||
source[0] if isinstance(source, list) else source | ||||||
) | ||||||
_ = raster.gmt # Load GMTDataArray accessor information | ||||||
return raster |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
""" | ||
Test the read function. | ||
""" | ||
|
||
import pytest | ||
from pygmt import read | ||
|
||
|
||
def test_read_invalid_kind(): | ||
""" | ||
Test that an invalid kind raises a ValueError. | ||
""" | ||
with pytest.raises(ValueError, match="Invalid kind"): | ||
read("file.cpt", kind="cpt") | ||
|
||
|
||
def test_read_invalid_arguments(): | ||
""" | ||
Test that invalid arguments raise a ValueError for non-'dataset' kind. | ||
""" | ||
with pytest.raises(ValueError, match="Only the 'dataset' kind supports"): | ||
read("file.nc", kind="grid", column_names="foo") | ||
|
||
with pytest.raises(ValueError, match="Only the 'dataset' kind supports"): | ||
read("file.nc", kind="grid", header=1) | ||
|
||
with pytest.raises(ValueError, match="Only the 'dataset' kind supports"): | ||
read("file.nc", kind="grid", dtype="float") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
load_dataarray
function was put under thepygmt.io
namespace. Should we consider puttingread
underpygmt.io
too? (Thinking about whether we need a low-levelpygmt.clib.read
and high-levelpygmt.io.read
in my other comment).