Skip to content

Commit

Permalink
Read Matrix Market with fast_matrix_market (python-graphblas#391)
Browse files Browse the repository at this point in the history
* Read Matrix Market with `fast_matrix_market`

* Update usage of `from_dense` and `to_dense` in docs and notebooks

* Ignore warning from new version of pydata/sparse (we should investigate later)

* `to_pydata_sparse(v)` on Vector should create 1-d array

* bump awkward to 2.1.1

* Maybe trust `ruff` to fix some things

But run `autoflake`, `isort`, `pyupgrade`, and `black` first (for now).

* Add a few notes to pre-commit hooks

* Add `fast-matrix-market` to optional dependencies documentation

* Drop autoflake in pre-commit (use ruff instead)

* Note that `scipy` is needed for all backends for mmread and mmwrite

* Add Matrix Market to `io` docs
  • Loading branch information
eriknw authored Mar 22, 2023
1 parent c3baea8 commit 4920dc8
Show file tree
Hide file tree
Showing 18 changed files with 227 additions and 116 deletions.
11 changes: 6 additions & 5 deletions .github/workflows/test_and_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -143,26 +143,27 @@ jobs:
nxver=$(python -c 'import random ; print(random.choice(["=2.7", "=2.8", "=3.0", ""]))')
yamlver=$(python -c 'import random ; print(random.choice(["=5.4", "=6.0", ""]))')
sparsever=$(python -c 'import random ; print(random.choice(["=0.12", "=0.13", "=0.14", ""]))')
fmmver=$(python -c 'import random ; print(random.choice(["=1.4", ""]))')
if [[ ${{ steps.pyver.outputs.selected }} == "3.8" ]]; then
npver=$(python -c 'import random ; print(random.choice(["=1.21", "=1.22", "=1.23", ""]))')
spver=$(python -c 'import random ; print(random.choice(["=1.8", "=1.9", "=1.10", ""]))')
pdver=$(python -c 'import random ; print(random.choice(["=1.2", "=1.3", "=1.4", "=1.5", ""]))')
akver=$(python -c 'import random ; print(random.choice(["=1.9", "=1.10", "=2.0", ""]))')
akver=$(python -c 'import random ; print(random.choice(["=1.9", "=1.10", "=2.0", "=2.1", ""]))')
elif [[ ${{ steps.pyver.outputs.selected }} == "3.9" ]]; then
npver=$(python -c 'import random ; print(random.choice(["=1.21", "=1.22", "=1.23", ""]))')
spver=$(python -c 'import random ; print(random.choice(["=1.8", "=1.9", "=1.10", ""]))')
pdver=$(python -c 'import random ; print(random.choice(["=1.2", "=1.3", "=1.4", "=1.5", ""]))')
akver=$(python -c 'import random ; print(random.choice(["=1.9", "=1.10", "=2.0", ""]))')
akver=$(python -c 'import random ; print(random.choice(["=1.9", "=1.10", "=2.0", "=2.1", ""]))')
elif [[ ${{ steps.pyver.outputs.selected }} == "3.10" ]]; then
npver=$(python -c 'import random ; print(random.choice(["=1.21", "=1.22", "=1.23", ""]))')
spver=$(python -c 'import random ; print(random.choice(["=1.8", "=1.9", "=1.10", ""]))')
pdver=$(python -c 'import random ; print(random.choice(["=1.3", "=1.4", "=1.5", ""]))')
akver=$(python -c 'import random ; print(random.choice(["=1.9", "=1.10", "=2.0", ""]))')
akver=$(python -c 'import random ; print(random.choice(["=1.9", "=1.10", "=2.0", "=2.1", ""]))')
else # Python 3.11
npver=$(python -c 'import random ; print(random.choice(["=1.23", ""]))')
spver=$(python -c 'import random ; print(random.choice(["=1.9", "=1.10", ""]))')
pdver=$(python -c 'import random ; print(random.choice(["=1.5", ""]))')
akver=$(python -c 'import random ; print(random.choice(["=1.10", "=2.0.5", "=2.0.6", "=2.0.7", "=2.0.8", ""]))')
akver=$(python -c 'import random ; print(random.choice(["=1.10", "=2.0", "=2.1", ""]))')
fi
if [[ ${{ steps.sourcetype.outputs.selected }} == "source" || ${{ steps.sourcetype.outputs.selected }} == "upstream" ]]; then
# TODO: there are currently issues with some numpy versions when
Expand All @@ -188,7 +189,7 @@ jobs:
# Once we have wheels for all OSes, we can delete the last two lines.
mamba install packaging pytest coverage coveralls=3.3.1 pytest-randomly cffi donfig pyyaml${yamlver} sparse${sparsever} \
pandas${pdver} scipy${spver} numpy${npver} awkward${akver} networkx${nxver} numba${numbaver} \
pandas${pdver} scipy${spver} numpy${npver} awkward${akver} networkx${nxver} numba${numbaver} fast_matrix_market${fmmver} \
${{ matrix.slowtask == 'pytest_bizarro' && 'black' || '' }} \
${{ matrix.slowtask == 'notebooks' && 'matplotlib nbconvert jupyter "ipython>=7"' || '' }} \
${{ steps.sourcetype.outputs.selected == 'upstream' && 'cython' || '' }} \
Expand Down
29 changes: 20 additions & 9 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,13 @@ repos:
hooks:
- id: validate-pyproject
name: Validate pyproject.toml
- repo: https://github.com/myint/autoflake
rev: v2.0.1
hooks:
- id: autoflake
args: [--in-place]
# We can probably remove `isort` if we come to trust `ruff --fix`,
# but we'll need to figure out the configuration to do this in `ruff`
- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: isort
# Let's keep `pyupgrade` even though `ruff --fix` probably does most of it
- repo: https://github.com/asottile/pyupgrade
rev: v3.3.1
hooks:
Expand All @@ -48,37 +46,50 @@ repos:
hooks:
- id: black
- id: black-jupyter
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.0.257
hooks:
- id: ruff
args: [--fix-only]
# Let's keep `flake8` even though `ruff` does much of the same.
# `flake8-bugbear` and `flake8-simplify` have caught things missed by `ruff`.
- repo: https://github.com/PyCQA/flake8
rev: 6.0.0
hooks:
- id: flake8
additional_dependencies: &flake8_dependencies
# These versions need updated manually
- flake8==6.0.0
- flake8-comprehensions==3.10.1
- flake8-bugbear==23.2.13
- flake8-bugbear==23.3.12
- flake8-simplify==0.19.3
- repo: https://github.com/asottile/yesqa
rev: v1.4.0
hooks:
- id: yesqa
additional_dependencies: *flake8_dependencies
- repo: https://github.com/codespell-project/codespell
rev: v2.2.2
rev: v2.2.4
hooks:
- id: codespell
types_or: [python, rst, markdown]
additional_dependencies: [tomli]
files: ^(graphblas|docs)/
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.0.252
rev: v0.0.257
hooks:
- id: ruff
- repo: https://github.com/sphinx-contrib/sphinx-lint
rev: v0.6.7
hooks:
- id: sphinx-lint
args: [--enable, all, "--disable=line-too-long,leaked-markup"]
# `pyroma` may help keep our package standards up to date if best practices change.
# This is probably a "low value" check though and safe to remove if we want faster pre-commit.
- repo: https://github.com/regebro/pyroma
rev: "4.2"
hooks:
- id: pyroma
args: [-n, "10", .]
- repo: local
hooks:
# Add `--hook-stage manual` to pre-commit command to run (very slow)
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ The following are not required by python-graphblas, but may be needed for certai
- `pandas` – required for nicer `__repr__`;
- `matplotlib` – required for basic plotting of graphs;
- `scipy` – used in io module to read/write `scipy.sparse` format;
- `networkx` – used in `io` module to interface with `networkx` graphs.
- `networkx` – used in `io` module to interface with `networkx` graphs;
- `fast-matrix-market` - for faster read/write of Matrix Market files with `gb.io.mmread` and `gb.io.mmwrite`.

## Description
Currently works with [SuiteSparse:GraphBLAS](https://github.com/DrTimothyAldenDavis/GraphBLAS), but the goal is to make it work with all implementations of the GraphBLAS spec.
Expand Down
2 changes: 2 additions & 0 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ pyyaml
pandas
# For I/O
awkward
fast_matrix_market
networkx
scipy
sparse
Expand All @@ -16,6 +17,7 @@ matplotlib
# For linting
pre-commit
# For testing
packaging
pytest-cov
# For debugging
icecream
Expand Down
1 change: 1 addition & 0 deletions docs/getting_started/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ to work.
- `matplotlib <https://matplotlib.org>`__ -- required for basic plotting of graphs
- `scipy <https://scipy.org/>`__ -- used in ``io`` module to read/write ``scipy.sparse`` format
- `networkx <https://networkx.org>`__ -- used in ``io`` module to interface with networkx graphs
- `fast-matrix-market <https://github.com/alugowski/fast_matrix_market>`__ -- for faster read/write of Matrix Market files with ``gb.io.mmread`` and ``gb.io.mmwrite``

GraphBLAS Fundamentals
----------------------
Expand Down
16 changes: 16 additions & 0 deletions docs/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,19 @@ Note that A is unchanged in the above example.
The SuiteSparse export has a ``give_ownership`` option. This performs a zero-copy
move operation and invalidates the original python-graphblas object. When extreme speed is needed or memory is
too limited to make a copy, this option may be needed.

Matrix Market files
-------------------

The `Matrix Market file format <https://math.nist.gov/MatrixMarket/formats.html>`_ is a common
file format for storing sparse arrays in human-readable ASCII.
Matrix Market files--also called MM files--often use ".mtx" file extension.
For example, many datasets in MM format can be found in `the SuiteSparse Matrix Collection <https://sparse.tamu.edu/>`_.

Use ``gb.io.mmread()`` to read a Matrix Market file to a python-graphblas Matrix,
and ``gb.io.mmwrite()`` to write a Matrix to a Matrix Market file.
These names match the equivalent functions in `scipy.sparse <https://docs.scipy.org/doc/scipy/reference/generated/scipy.io.mmread.html>`_.

``scipy`` is required to be installed to read Matrix Market files.
If ``fast_matrix_market`` is installed, it will be used by default for
`much better performance <https://github.com/alugowski/fast_matrix_market>`_.
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ dependencies:
- pandas
# For I/O
- awkward
# - fast_matrix_market # Coming soon...
- fast_matrix_market
- networkx
- scipy
- sparse
Expand Down
5 changes: 2 additions & 3 deletions graphblas/core/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,8 @@ def parse_indices(self, indices, shape):
raise TypeError(f"Index for {type(self.obj).__name__} cannot be a tuple")
# Convert to tuple for consistent processing
indices = (indices,)
else: # len(shape) == 2
if type(indices) is not tuple or len(indices) != 2:
raise TypeError(f"Index for {type(self.obj).__name__} must be a 2-tuple")
elif type(indices) is not tuple or len(indices) != 2:
raise TypeError(f"Index for {type(self.obj).__name__} must be a 2-tuple")

out = []
for i, idx in enumerate(indices):
Expand Down
11 changes: 4 additions & 7 deletions graphblas/core/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -3154,14 +3154,11 @@ def _prep_for_assign(self, resolved_indexes, value, mask, is_submask, replace, o
mask = _vanilla_subassign_mask(
self, mask, rowidx, colidx, replace, opts
)
elif backend == "suitesparse":
cfunc_name = "GxB_Matrix_subassign_Scalar"
else:
if backend == "suitesparse":
cfunc_name = "GxB_Matrix_subassign_Scalar"
else:
cfunc_name = "GrB_Matrix_assign_Scalar"
mask = _vanilla_subassign_mask(
self, mask, rowidx, colidx, replace, opts
)
cfunc_name = "GrB_Matrix_assign_Scalar"
mask = _vanilla_subassign_mask(self, mask, rowidx, colidx, replace, opts)
expr_repr = (
"[[{2._expr_name} rows], [{4._expr_name} cols]]"
f"({mask.name})"
Expand Down
31 changes: 11 additions & 20 deletions graphblas/core/ss/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -895,9 +895,8 @@ def _export(self, format=None, *, sort=False, give_ownership=False, raw=False, m
if is_iso:
if values.size > 1: # pragma: no branch (suitesparse)
values = values[:1]
else:
if values.size > nvals: # pragma: no branch (suitesparse)
values = values[:nvals]
elif values.size > nvals: # pragma: no branch (suitesparse)
values = values[:nvals]
# Note: nvals is also at `indptr[nrows]`
rv = {
"indptr": indptr,
Expand Down Expand Up @@ -937,9 +936,8 @@ def _export(self, format=None, *, sort=False, give_ownership=False, raw=False, m
if is_iso:
if values.size > 1: # pragma: no cover (suitesparse)
values = values[:1]
else:
if values.size > nvals:
values = values[:nvals]
elif values.size > nvals:
values = values[:nvals]
# Note: nvals is also at `indptr[ncols]`
rv = {
"indptr": indptr,
Expand Down Expand Up @@ -989,9 +987,8 @@ def _export(self, format=None, *, sort=False, give_ownership=False, raw=False, m
if is_iso:
if values.size > 1: # pragma: no cover (suitesparse)
values = values[:1]
else:
if values.size > nvals:
values = values[:nvals]
elif values.size > nvals:
values = values[:nvals]
# Note: nvals is also at `indptr[nvec]`
rv = {
"indptr": indptr,
Expand Down Expand Up @@ -1044,9 +1041,8 @@ def _export(self, format=None, *, sort=False, give_ownership=False, raw=False, m
if is_iso:
if values.size > 1: # pragma: no cover (suitesparse)
values = values[:1]
else:
if values.size > nvals:
values = values[:nvals]
elif values.size > nvals:
values = values[:nvals]
# Note: nvals is also at `indptr[nvec]`
rv = {
"indptr": indptr,
Expand Down Expand Up @@ -3480,15 +3476,10 @@ def _import_any(
format = "cooc"
else:
format = "coo"
elif isinstance(values, np.ndarray) and values.ndim == 2 and values.flags.f_contiguous:
format = "fullc"
else:
if (
isinstance(values, np.ndarray)
and values.ndim == 2
and values.flags.f_contiguous
):
format = "fullc"
else:
format = "fullr"
format = "fullr"
else:
format = format.lower()
if method == "pack":
Expand Down
15 changes: 6 additions & 9 deletions graphblas/core/ss/vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,9 +551,8 @@ def _export(self, format=None, *, sort=False, give_ownership=False, raw=False, m
if is_iso:
if values.size > 1: # pragma: no cover (suitesparse)
values = values[:1]
else:
if values.size > nvals:
values = values[:nvals]
elif values.size > nvals:
values = values[:nvals]
rv = {
"size": size,
"indices": indices,
Expand Down Expand Up @@ -589,9 +588,8 @@ def _export(self, format=None, *, sort=False, give_ownership=False, raw=False, m
if is_iso:
if values.size > 1: # pragma: no cover (suitesparse)
values = values[:1]
else:
if values.size > size: # pragma: no branch (suitesparse)
values = values[:size]
elif values.size > size: # pragma: no branch (suitesparse)
values = values[:size]
rv = {
"bitmap": bitmap,
"nvals": nvals[0],
Expand All @@ -616,9 +614,8 @@ def _export(self, format=None, *, sort=False, give_ownership=False, raw=False, m
if is_iso:
if values.size > 1:
values = values[:1]
else:
if values.size > size: # pragma: no branch (suitesparse)
values = values[:size]
elif values.size > size: # pragma: no branch (suitesparse)
values = values[:size]
rv = {}
if raw or is_iso:
rv["size"] = size
Expand Down
9 changes: 4 additions & 5 deletions graphblas/core/vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -1868,12 +1868,11 @@ def _prep_for_assign(self, resolved_indexes, value, mask, is_submask, replace, o
else:
cfunc_name = f"GrB_Vector_assign_{dtype_name}"
mask = _vanilla_subassign_mask(self, mask, idx, replace, opts)
elif backend == "suitesparse":
cfunc_name = "GxB_Vector_subassign_Scalar"
else:
if backend == "suitesparse":
cfunc_name = "GxB_Vector_subassign_Scalar"
else:
cfunc_name = "GrB_Vector_assign_Scalar"
mask = _vanilla_subassign_mask(self, mask, idx, replace, opts)
cfunc_name = "GrB_Vector_assign_Scalar"
mask = _vanilla_subassign_mask(self, mask, idx, replace, opts)
expr_repr = (
"[[{2._expr_name} elements]]"
f"({mask.name})" # fmt: skip
Expand Down
Loading

0 comments on commit 4920dc8

Please sign in to comment.