diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml index 7d2c88cf..5109ff5c 100644 --- a/.github/workflows/build-sphinx.yml +++ b/.github/workflows/build-sphinx.yml @@ -65,8 +65,14 @@ jobs: done exit $exitcode - - name: Check out sample data - run: make get_sampledata + - name: Run custom extension tests + run: make test_investspec + + - name: Demo the custom extension + run: make demo_investspec + + - name: Build sphinx docs + run: make html - name: Check links # this has rarely run for hours for no apparent reason @@ -74,9 +80,6 @@ jobs: continue-on-error: true run: make linkcheck - - name: Build sphinx docs - run: make html - - name: Set up GCP SDK if: ${{ github.ref == 'refs/heads/main' }} uses: google-github-actions/setup-gcloud@master diff --git a/Makefile b/Makefile index c6932395..a57a6980 100644 --- a/Makefile +++ b/Makefile @@ -12,16 +12,18 @@ GIT_SAMPLE_DATA_REPO := https://bitbucket.org/natcap/invest-sample-data.g GIT_SAMPLE_DATA_REPO_PATH := invest-sample-data GIT_SAMPLE_DATA_REPO_REV := c8df675a2c446bf8d00ffd8f0cbab933f7d5c25a -.PHONY: help clean html changes linkcheck +.PHONY: help clean html changes linkcheck prep_sampledata test_investspec demo_investspec help: @echo "Please use \`make ' where is one of" - @echo " clean to remove the build directory" - @echo " html to make standalone HTML files" - @echo " changes to make an overview of all changed/added/deprecated items" - @echo " linkcheck to check all external links for integrity" - @echo " get_sampledata to check out the invest-sample-data repo" - @echo " prep_sampledata to create modified tables in invest-sample-data that display nicely" + @echo " clean to remove the build directory" + @echo " html to make standalone HTML files" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " linkcheck to check all external links for integrity" + @echo " $(GIT_SAMPLE_DATA_REPO_PATH) to check out the invest-sample-data repo" + @echo " prep_sampledata to create modified tables in invest-sample-data that display nicely" + @echo " test_investspec to run unit tests for the custom Sphinx extension" + @echo " demo_investspec to run a demo using the custom Sphinx extension" clean: -rm -rf $(BUILDDIR)/* @@ -46,14 +48,29 @@ linkcheck: $(SOURCEDIR) @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." -get_sampledata: - -git clone $(GIT_SAMPLE_DATA_REPO) $(GIT_SAMPLE_DATA_REPO_PATH) - git -C $(GIT_SAMPLE_DATA_REPO_PATH) fetch - git -C $(GIT_SAMPLE_DATA_REPO_PATH) lfs install - git -C $(GIT_SAMPLE_DATA_REPO_PATH) lfs fetch - git -C $(GIT_SAMPLE_DATA_REPO_PATH) checkout $(GIT_SAMPLE_DATA_REPO_REV) +test_investspec: + cd extensions/investspec && python -m unittest test.test_investspec -prep_sampledata: +CUSTOM_EXTENSION_TEST_DIR = extensions/investspec/test +demo_investspec: + # remove any old build files + rm -rf $(CUSTOM_EXTENSION_TEST_DIR)/build + # install the mock module `test_module` + pip install $(CUSTOM_EXTENSION_TEST_DIR)/test_module + # -W: fail on warning + # -a: write all files, not just new or changed files + sphinx-build -W -a -b html $(CUSTOM_EXTENSION_TEST_DIR) $(CUSTOM_EXTENSION_TEST_DIR)/build + +# initialize the sample data repo and check out the commit +$(GIT_SAMPLE_DATA_REPO_PATH): + mkdir $(GIT_SAMPLE_DATA_REPO_PATH) && cd $(GIT_SAMPLE_DATA_REPO_PATH) + git -C $(GIT_SAMPLE_DATA_REPO_PATH) init + git -C $(GIT_SAMPLE_DATA_REPO_PATH) remote add origin $(GIT_SAMPLE_DATA_REPO) + git -C $(GIT_SAMPLE_DATA_REPO_PATH) fetch --depth 1 origin $(GIT_SAMPLE_DATA_REPO_REV) + # GIT_LFS_SKIP_SMUDGE=1 prevents getting all the lfs files, we only need the CSVs + GIT_LFS_SKIP_SMUDGE=1 git -C $(GIT_SAMPLE_DATA_REPO_PATH) checkout $(GIT_SAMPLE_DATA_REPO_REV) + +prep_sampledata: $(GIT_SAMPLE_DATA_REPO_PATH) # take selections of tables that are too long to display in full head -n1 invest-sample-data/pollination/landcover_biophysical_table.csv > invest-sample-data/pollination/landcover_biophysical_table_modified.csv tail -n3 invest-sample-data/pollination/landcover_biophysical_table.csv >> invest-sample-data/pollination/landcover_biophysical_table_modified.csv diff --git a/extensions/investspec/README.md b/extensions/investspec/README.md new file mode 100644 index 00000000..03ba4675 --- /dev/null +++ b/extensions/investspec/README.md @@ -0,0 +1,80 @@ +# investspec extension for Sphinx + +This is a custom Sphinx extension that generates documentation of InVEST model inputs from the model's `ARGS_SPEC`. +Its purpose is to help us reduce duplicated information and provide consistent, user-friendly documentation. +The `investspec` extension provides the `:investspec:` role, which can be used inline in RST files to insert generated documentation anywhere you want. + +## Setup + +`investspec` is already set up in the `conf.py` in this repo. + +In case we need to use it anywhere else, here's how to set it up: +In the `conf.py` file for the source RST, add the `investspec/` root directory to `sys.path` so that Sphinx can find it: +``` +sys.path.append(os.path.abspath('../extensions/investspec')) # or other path as appropriate +``` +and add `investspec` to the list of extensions: +``` +extensions = ['investspec'] +``` +to avoid writing out `natcap.invest. ...` before the module name every time, set the module prefix: +``` +investspec_module_prefix = 'natcap.invest' +``` + +## Usage + +The `investspec` role takes two arguments: `` :investspec:`module key` `` + +`module` (or `f'{investspec_module_prefix}.{module}'` if `investspec_module_prefix` is defined) must be an importable python module. It must have an attribute `ARGS_SPEC` that is a well-formed InVEST args spec dictionary. + +The second argument specifies which (nested) arg to document. It is a period-separated series of dictionary keys accessed starting at `ARGS_SPEC.args`. For example: +``` +ARGS_SPEC = { + "model_name": "InVEST Model", + "args": { + "biophysical_table_path": { + "type": "csv", + "name": "Biophysical Table" + "columns": { + "lucode": {"type": "integer"}, + "path": { + "type": "vector", + "fields": { + "value": { + "type": "integer" + } + } + } + } + +... +} +``` +If this model is located at `natcap.invest.model_name`, then you can auto-document: + +- `` :investspec:`model_name biophysical_table_path` `` + +- `` :investspec:`model_name biophysical_table_path.columns.path` `` + +- `` :investspec:`model_name biophysical_table_path.columns.path.fields.value` `` + +You can document any arg in the `ARGS_SPEC.args` dictionary this way. This includes any nested dictionary with a `type` attribute: + +- top-level args +- any row or column within a csv's `"rows"` or `"columns"` dict +- any field within a vector's `"fields"` dict +- any file or directory within a directory's `"contents"` dict + +## What is not documented +- `expression`s for `number` types. This can be any python expression, so it may be too complicated to to auto-format into human readable text. Any limits on a `number`'s value should also be described in the `about` text. +- Conditional requirements (`"required": `). This can be any python expression, so it may be too complicated to auto-format into human readable text. For any conditionally-required input, the conditions upon which it is required should also be described in the `about` text. + +## Limitations +- This implementation can only generate output that uses standard docutils features, and no sphinx-specific features. See natcap/invest.users-guide#35 for details. +- Relies on the `ARGS_SPEC` being complete. For example, columns in a table's `columns` attribute should either all have an `about` attribute, or none have an `about` attribute. However, it is still valid for only some to have an `about` attribute. If some are missing, it will work, but the generated docs will look a little strange. + +## Tests +From the top level of this repo, you can run `make test_investspec` to run a set of unit tests (`extensions/investspec/test/test_investspec.py`). These only test the formatting logic. + +`make demo_investspec` exists as a sort-of integration test to prove that the extension works without errors. The output is not checked for correctness. It installs the mock module in `extensions/investspec/test/test_module`, then builds HTML docs from `extensions/investspec/test/index.rst`, using the `investspec` role. You can look at the output in `extensions/investspec/test/build` for examples of what the role does. diff --git a/extensions/investspec/investspec.py b/extensions/investspec/investspec.py new file mode 100644 index 00000000..9ac99e50 --- /dev/null +++ b/extensions/investspec/investspec.py @@ -0,0 +1,379 @@ +import docutils +import importlib + +import pint +from natcap.invest import spec_utils + +INPUT_TYPES_HTML_FILE = 'input_types.html' +# accepted geometries for a vector will be displayed in this order +GEOMETRY_ORDER = [ + 'POINT', + 'MULTIPOINT', + 'LINESTRING', + 'MULTILINESTRING', + 'POLYGON', + 'MULTIPOLYGON'] + + +def format_type_string(arg_type): + """Represent an arg type as a user-friendly string. + + Args: + arg_type (str|set(str)): the type to format. May be a single type or a + set of types. + + Returns: + formatted string that links to a description of the input type(s) + """ + def format_single_type(arg_type): + """Represent a type as a link to the corresponding Input Types section. + + Args: + arg_type (str): the type to format. + + Returns: + formatted string that links to a description of the input type + """ + # Represent the type as a string. Some need a more user-friendly name. + # we can only use standard docutils features here, so no :ref: + # this syntax works to link to a section in a different page, but it + # isn't universally supported and depends on knowing the built page name. + if arg_type == 'freestyle_string': + return f'`text <{INPUT_TYPES_HTML_FILE}#text>`__' + elif arg_type == 'option_string': + return f'`option <{INPUT_TYPES_HTML_FILE}#option>`__' + elif arg_type == 'boolean': + return f'`true/false <{INPUT_TYPES_HTML_FILE}#truefalse>`__' + elif arg_type == 'csv': + return f'`CSV <{INPUT_TYPES_HTML_FILE}#csv>`__' + else: + return f'`{arg_type} <{INPUT_TYPES_HTML_FILE}#{arg_type}>`__' + if isinstance(arg_type, set): + return ' or '.join(format_single_type(t) for t in sorted(arg_type)) + else: + return format_single_type(arg_type) + + +def format_required_string(required): + """Represent an arg's required status as a user-friendly string. + + Args: + required (bool | str | None): required property of an arg. May be + `True`, `False`, `None`, or a conditional string. + + Returns: + string + """ + if required is None or required is True: + return 'required' + elif required is False: + return 'optional' + else: + # assume that the about text will describe the conditional + return 'conditionally required' + + +def format_geometries_string(geometries): + """Represent a set of allowed vector geometries as user-friendly text. + + Args: + geometries (set(str)): set of geometry names + + Returns: + string + """ + # sort the geometries so they always display in a consistent order + sorted_geoms = sorted( + geometries, + key=lambda g: GEOMETRY_ORDER.index(g)) + return '/'.join(geom.lower() for geom in sorted_geoms) + + +def format_permissions_string(permissions): + """Represent a rwx-style permissions string as user-friendly text. + + Args: + permissions (str): rwx-style permissions string + + Returns: + string + """ + permissions_strings = [] + if 'r' in permissions: + permissions_strings.append('read') + if 'w' in permissions: + permissions_strings.append('write') + if 'x' in permissions: + permissions_strings.append('execute') + return ', '.join(permissions_strings) + + +def format_options_string_from_dict(options): + """Represent a dictionary of option: description pairs as a bulleted list. + + Args: + options (dict): the dictionary of options to document, where keys are + options and values are descriptions of the options + + Returns: + list of RST-formatted strings, where each is a line in a bullet list + """ + lines = [] + # casefold() is a more aggressive version of lower() that may work better + # for some languages to remove all case distinctions + sorted_options = sorted( + list(options.keys()), + key=lambda option: option.casefold() + ) + for option in sorted_options: + lines.append(f'- {option}: {options[option]}') + return lines + + +def format_options_string_from_list(options): + """Represent options as a comma-separated list. + + Args: + options (list[str]): the set of options to document + + Returns: + string of comma-separated options + """ + return ', '.join(options) + + +def capitalize(title): + """Capitalize a string into title case. + + Args: + title (str): string to capitalize + + Returns: + capitalized string (each word capitalized except linking words) + """ + + def capitalize_word(word): + """Capitalize a word, if appropriate.""" + if word in {'of', 'the'}: + return word + else: + return word[0].upper() + word[1:] + + title = ' '.join([capitalize_word(word) for word in title.split(' ')]) + title = '/'.join([capitalize_word(word) for word in title.split('/')]) + return title + + +def format_arg(name, spec): + """Format an arg spec into user-friendly documentation. + + This is used for documenting: + - a single top-level arg + - a row or column in a CSV + - a field in a vector + - an item in a directory + + Args: + name (str): Name to give the section. For top-level args this is + arg['name']. For nested args it's typically their key in the + dictionary one level up. + spec (dict): A arg spec dictionary that conforms to the InVEST args + spec specification. It must at least have the key `'type'`, and + whatever other keys are expected for that type. + Returns: + list of strings, where each string is a line of RST-formatted text. + The first line has the arg name, type, required state, description, + and units if applicable. Depending on the type, there may be additional + lines that are indented, that describe details of the arg such as + vector fields and geometries, option_string options, etc. + """ + type_string = format_type_string(spec['type']) + in_parentheses = [type_string] + + # For numbers and rasters that have units, display the units + units = None + if spec['type'] == 'number': + units = spec['units'] + elif spec['type'] == 'raster' and spec['bands'][1]['type'] == 'number': + units = spec['bands'][1]['units'] + if units: + units_string = spec_utils.format_unit(units) + if units_string and units_string != 'none': + in_parentheses.append(f'units: **{units_string}**') + + if spec['type'] == 'vector': + in_parentheses.append(format_geometries_string(spec["geometries"])) + + # Represent the required state as a string, defaulting to required + # It doesn't make sense to include this for boolean checkboxes + if spec['type'] != 'boolean': + # get() returns None if the key doesn't exist in the dictionary + required_string = format_required_string(spec.get('required')) + in_parentheses.append(f'*{required_string}*') + + # Nested args may not have an about section + if 'about' in spec: + about_string = f': {spec["about"]}' + else: + about_string = '' + + first_line = f"**{name}** ({', '.join(in_parentheses)}){about_string}" + + # Add details for the types that have them + indented_block = [] + if spec['type'] == 'option_string': + # may be either a dict or set. if it's empty, the options are + # dynamically generated. don't try to document them. + if spec['options']: + if isinstance(spec['options'], dict): + indented_block.append('Options:') + indented_block += format_options_string_from_dict(spec['options']) + else: + formatted_options = format_options_string_from_list(spec['options']) + indented_block.append(f'Options: {formatted_options}') + + elif spec['type'] == 'csv': + if 'columns' in spec: + header_name = 'columns' + elif 'rows' in spec: + header_name = 'rows' + else: + header_name = None + + if header_name is None: + first_line += ( + ' Please see the sample data table for details on the format.') + + # prepend the indent to each line in the indented block + return [first_line] + ['\t' + line for line in indented_block] + + +def parse_rst(text): + """Parse RST text into a list of docutils nodes. + + Args: + text (str): RST-formatted text to parse. May only use standard + docutils features (no Sphinx roles etc) + + Returns: + list[docutils.Node] + """ + doc = docutils.utils.new_document( + '', + settings=docutils.frontend.OptionParser( + components=(docutils.parsers.rst.Parser,) + ).get_default_values()) + parser = docutils.parsers.rst.Parser() + parser.parse(text, doc) + + # Skip the all-encompassing document node + first_node = doc.next_node() + number_of_top_level_nodes = len( + first_node.traverse(descend=False, siblings=True)) + # if the content is wrapped in a paragraph node, + # skip it so it can display in-line + if (isinstance(first_node, docutils.nodes.paragraph) and + number_of_top_level_nodes == 1): + first_node = first_node.next_node() + + # This is a list of the node and its siblings + return list(first_node.traverse(descend=False, siblings=True)) + + +def invest_spec(name, rawtext, text, lineno, inliner, options={}, content=[]): + """Custom docutils role to generate InVEST model input docs from spec. + + Docutils expects a function that accepts all of these args. + + Args: + name (str): the local name of the interpreted text role, the role name + actually used in the document. + rawtext (str): a string containing the entire interpreted text + construct. Return it as a ``problematic`` node linked to a system + message if there is a problem. + text (str): the interpreted text content, with backslash escapes + converted to nulls (``\x00``). + lineno (int): the line number where the interpreted text begins. + inliner (Inliner): the Inliner object that called the role function. + It defines the following useful attributes: ``reporter``, + ``problematic``, ``memo``, ``parent``, ``document``. + options (dict): A dictionary of directive options for customization, to + be interpreted by the role function. Used for additional + attributes for the generated elements and other functionality. + content (list[str]): the directive content for customization + ("role" directive). To be interpreted by the role function. + + Interpreted role functions return a tuple of two values: + + Returns: + a tuple of two values: + - A list of nodes which will be inserted into the document tree at + the point where the interpreted role was encountered + - A list of system messages, which will be inserted into the + document tree immediately after the end of the current + inline block. + """ + # expect one or two space-separated arguments + # the first argument is a module name to import (that has an ARGS_SPEC) + # the second argument is a period-separated series of dictionary keys + # that says what layer in the nested ARGS_SPEC dictionary to document + arguments = text.split(' ', maxsplit=1) + # access the `investspec_module_prefix` config setting from conf.py + prefix = inliner.document.settings.env.app.config.investspec_module_prefix + if prefix: + module_name = f'{prefix}.{arguments[0]}' + else: + module_name = arguments[0] + # import the specified module (that should have an ARGS_SPEC attribute) + try: + module = importlib.import_module(module_name) + except ImportError: + raise ValueError( + f'Could not import the module "{module_name}" (line {lineno})') + + # document the (nested) arg at the given location + # Get the key:value pair at the specified location in the module's spec + value = module.ARGS_SPEC['args'] + keys = arguments[1].split('.') # period-separated series of keys + for i, key in enumerate(keys): + # convert raster band numbers to ints + if keys[i - 1] == 'bands': + key = int(key) + try: + value = value[key] + except KeyError: + keys_so_far = '.'.join(keys[:i + 1]) + raise ValueError( + f"Could not find the key '{keys_so_far}' in the " + f"{module_name} model's ARGS_SPEC (line {lineno})") + + # format that spec into an RST formatted description string + if isinstance(value, dict): + arg_name = capitalize(value['name']) if 'name' in value else key + rst = '\n\n'.join(format_arg(arg_name, value)) + elif isinstance(value, pint.Unit): + rst = spec_utils.format_unit(value) + else: + rst = str(value) + + return parse_rst(rst), [] + + +def setup(app): + """Add the custom extension to Sphinx. + + Sphinx calls this when it runs conf.py which contains + `extensions = ['investspec']` + + Args: + app (sphinx.application.Sphinx) + + Returns: + empty dictionary + """ + # tell sphinx to get a config value called investspec_module_prefix from + # conf.py. it defaults to an empty string. + # its value will be accessible later in the invest_spec function. + app.add_config_value('investspec_module_prefix', '', 'html') + app.add_role("investspec", invest_spec) + return {} diff --git a/extensions/investspec/test/__init__.py b/extensions/investspec/test/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/extensions/investspec/test/conf.py b/extensions/investspec/test/conf.py new file mode 100644 index 00000000..37d251ae --- /dev/null +++ b/extensions/investspec/test/conf.py @@ -0,0 +1,7 @@ +import sys +import os + + +sys.path.append(os.path.abspath('..')) +extensions = ['investspec'] +investspec_module_prefix = 'test_module' diff --git a/extensions/investspec/test/index.rst b/extensions/investspec/test/index.rst new file mode 100644 index 00000000..8984308c --- /dev/null +++ b/extensions/investspec/test/index.rst @@ -0,0 +1,43 @@ +custom sphinx role demo +*********************** + + +Here are examples of all the types: +----------------------------------- + +- :investspec:`test_module number_input` +- :investspec:`test_module ratio_input` +- :investspec:`test_module percent_input` +- :investspec:`test_module integer_input` +- :investspec:`test_module boolean_input` +- :investspec:`test_module freestyle_string_input` +- :investspec:`test_module option_string_input` +- :investspec:`test_module raster_input` +- :investspec:`test_module another_raster_input` +- :investspec:`test_module vector_input` +- :investspec:`test_module csv_input` +- :investspec:`test_module directory_input` + + +You can access any attribute or nested arg by a period-separated series of keys. Here is a nested CSV in the directory: +----------------------------------------------------------------------------------------------------------------------- + +:investspec:`test_module directory_input.contents.baz` + + +Here is a raster column in the CSV: +----------------------------------- + +:investspec:`test_module directory_input.contents.baz.columns.raster_path` + + +Here is the raster units: +------------------------- + +:investspec:`test_module directory_input.contents.baz.columns.raster_path.bands.1.units` + + +Outputs can be used in-line too: +-------------------------------------------- + +:investspec:`test_module directory_input.name` is the name of this arg. diff --git a/extensions/investspec/test/test_investspec.py b/extensions/investspec/test/test_investspec.py new file mode 100644 index 00000000..d834ce6a --- /dev/null +++ b/extensions/investspec/test/test_investspec.py @@ -0,0 +1,243 @@ +import unittest + +import pint +import investspec + +ureg = pint.UnitRegistry() +ureg.define('none = []') + + +class TestInvestSpec(unittest.TestCase): + + def test_number_spec(self): + spec = { + "name": "Bar", + "about": "Description", + "type": "number", + "units": ureg.meter**3/ureg.month, + "expression": "value >= 0" + } + out = investspec.format_arg(spec['name'], spec) + expected_rst = ([ + '**Bar** (`number `__, ' + 'units: **m³/month**, *required*): Description']) + self.assertEqual(repr(out), repr(expected_rst)) + + def test_ratio_spec(self): + spec = { + "name": "Bar", + "about": "Description", + "type": "ratio" + } + out = investspec.format_arg(spec['name'], spec) + expected_rst = (['**Bar** (`ratio `__, ' + '*required*): Description']) + self.assertEqual(repr(out), repr(expected_rst)) + + def test_percent_spec(self): + spec = { + "name": "Bar", + "about": "Description", + "type": "percent", + "required": False + } + out = investspec.format_arg(spec['name'], spec) + expected_rst = (['**Bar** (`percent `__, ' + '*optional*): Description']) + self.assertEqual(repr(out), repr(expected_rst)) + + def test_code_spec(self): + spec = { + "name": "Bar", + "about": "Description", + "type": "integer", + "required": True + } + out = investspec.format_arg(spec['name'], spec) + expected_rst = (['**Bar** (`integer `__, ' + '*required*): Description']) + self.assertEqual(repr(out), repr(expected_rst)) + + def test_boolean_spec(self): + spec = { + "name": "Bar", + "about": "Description", + "type": "boolean" + } + out = investspec.format_arg(spec['name'], spec) + expected_rst = (['**Bar** (`true/false ' + '`__): Description']) + self.assertEqual(repr(out), repr(expected_rst)) + + def test_freestyle_string_spec(self): + spec = { + "name": "Bar", + "about": "Description", + "type": "freestyle_string" + } + out = investspec.format_arg(spec['name'], spec) + expected_rst = (['**Bar** (`text `__, ' + '*required*): Description']) + self.assertEqual(repr(out), repr(expected_rst)) + + def test_option_string_spec_dictionary(self): + spec = { + "name": "Bar", + "about": "Description", + "type": "option_string", + "options": { + "option_a": "do something", + "Option_b": "do something else" + } + } + # expect that option case is ignored + # otherwise, Option_b would sort before option_a + out = investspec.format_arg(spec['name'], spec) + expected_rst = ([ + '**Bar** (`option `__, *required*): Description', + '\tOptions:', + '\t- option_a: do something', + '\t- Option_b: do something else' + ]) + self.assertEqual(repr(out), repr(expected_rst)) + + def test_option_string_spec_list(self): + spec = { + "name": "Bar", + "about": "Description", + "type": "option_string", + "options": ["option_a", "Option_b"] + } + out = investspec.format_arg(spec['name'], spec) + expected_rst = ([ + '**Bar** (`option `__, *required*): Description', + '\tOptions: option_a, Option_b' + ]) + self.assertEqual(repr(out), repr(expected_rst)) + + def test_raster_spec(self): + spec = { + "type": "raster", + "bands": {1: {"type": "integer"}}, + "about": "Description", + "name": "Bar" + } + out = investspec.format_arg(spec['name'], spec) + expected_rst = ([ + '**Bar** (`raster `__, *required*): Description' + ]) + self.assertEqual(repr(out), repr(expected_rst)) + + spec = { + "type": "raster", + "bands": {1: { + "type": "number", + "units": ureg.millimeter/ureg.year + }}, + "about": "Description", + "name": "Bar" + } + out = investspec.format_arg(spec['name'], spec) + expected_rst = ([ + '**Bar** (`raster `__, units: **mm/year**, *required*): Description' + ]) + self.assertEqual(repr(out), repr(expected_rst)) + + def test_vector_spec(self): + spec = { + "type": "vector", + "fields": {}, + "geometries": {"LINESTRING"}, + "about": "Description", + "name": "Bar" + } + out = investspec.format_arg(spec['name'], spec) + expected_rst = ([ + '**Bar** (`vector `__, linestring, *required*): Description' + ]) + self.assertEqual(repr(out), repr(expected_rst)) + + spec = { + "type": "vector", + "fields": { + "id": { + "type": "integer", + "about": "Unique identifier for each feature" + }, + "precipitation": { + "type": "number", + "units": ureg.millimeter/ureg.year, + "about": "Average annual precipitation over the area" + } + }, + "geometries": {"POLYGON", "MULTIPOLYGON"}, + "about": "Description", + "name": "Bar" + } + out = investspec.format_arg(spec['name'], spec) + expected_rst = ([ + '**Bar** (`vector `__, polygon/multipolygon, *required*): Description', + ]) + self.assertEqual(repr(out), repr(expected_rst)) + + def test_csv_spec(self): + spec = { + "type": "csv", + "about": "Description.", + "name": "Bar" + } + out = investspec.format_arg(spec['name'], spec) + expected_rst = ([ + '**Bar** (`CSV `__, *required*): Description. ' + 'Please see the sample data table for details on the format.' + ]) + self.assertEqual(repr(out), repr(expected_rst)) + + # Test every type that can be nested in a CSV column: + # number, ratio, percent, code, + spec = { + "type": "csv", + "about": "Description", + "name": "Bar", + "columns": { + "b": {"type": "ratio", "about": "description"} + } + } + out = investspec.format_arg(spec['name'], spec) + expected_rst = ([ + '**Bar** (`CSV `__, *required*): Description' + ]) + self.assertEqual(repr(out), repr(expected_rst)) + + def test_directory_spec(self): + self.maxDiff = None + spec = { + "type": "directory", + "about": "Description", + "name": "Bar", + "contents": {} + } + out = investspec.format_arg(spec['name'], spec) + expected_rst = ([ + '**Bar** (`directory `__, *required*): Description' + ]) + self.assertEqual(repr(out), repr(expected_rst)) + + def test_multi_type_spec(self): + spec = { + "type": {"raster", "vector"}, + "about": "Description", + "name": "Bar", + "bands": {1: {"type": "integer"}}, + "geometries": {"POLYGON"}, + "fields": {} + } + out = investspec.format_arg(spec['name'], spec) + expected_rst = ([ + '**Bar** (`raster `__ or `vector `__, *required*): Description' + ]) + self.assertEqual(repr(out), repr(expected_rst)) + + +if __name__ == '__main__': + unittest.main() diff --git a/extensions/investspec/test/test_module/setup.py b/extensions/investspec/test/test_module/setup.py new file mode 100644 index 00000000..2cf59839 --- /dev/null +++ b/extensions/investspec/test/test_module/setup.py @@ -0,0 +1,7 @@ +from setuptools import setup + +setup( + name='test_module', + version='0.1', + packages=['test_module'], + zip_safe=False) diff --git a/extensions/investspec/test/test_module/test_module/test_module.py b/extensions/investspec/test/test_module/test_module/test_module.py new file mode 100644 index 00000000..502a8816 --- /dev/null +++ b/extensions/investspec/test/test_module/test_module/test_module.py @@ -0,0 +1,123 @@ +import pint + +ureg = pint.UnitRegistry() +ureg.define('none = []') + +ARGS_SPEC = { + "model_name": "Forest Carbon Edge Effect Model", + "module": __name__, + "userguide_html": "carbon_edge.html", + "args_with_spatial_overlap": { + "spatial_keys": ["aoi_vector_path", "lulc_raster_path"], + }, + "args": { + "number_input": { + "name": "Foo", + "about": "Numbers have units that are displayed in a human-readable way.", + "type": "number", + "units": ureg.meter**3/ureg.month, + "expression": "value >= 0" + }, + "ratio_input": { + "name": "Bar", + "about": "Here's a ratio.", + "type": "ratio" + }, + "percent_input": { + "name": "Baz", + "about": "Here's a percent.", + "type": "percent", + "required": False + }, + "integer_input": { + "name": "Abc", + "about": "Here's an integer.", + "type": "integer", + "required": True + }, + "boolean_input": { + "name": "Defg", + "about": "Here's a boolean.", + "type": "boolean" + }, + "freestyle_string_input": { + "name": "Hijk", + "about": ( + "Here's a freestyle string. If its spec has a `regexp` " + "attribute, we don't display that. The `about` attribute " + "should describe any required pattern in a user-friendly way." + ), + "type": "freestyle_string" + }, + "option_string_input": { + "name": "Lmn", + "about": ( + "For option_strings, we display the options in a bullet list."), + "type": "option_string", + "options": { + "option_a": "do something", + "option_b": "do something else" + } + }, + "raster_input": { + "type": "raster", + "bands": {1: {"type": "code"}}, + "about": "Rasters are pretty simple.", + "name": "Opq" + }, + "another_raster_input": { + "type": "raster", + "bands": {1: { + "type": "number", + "units": ureg.millimeter/ureg.year + }}, + "about": ( + "If the raster's band is a `number` type, display its units"), + "name": "Rst" + }, + "vector_input": { + "type": "vector", + "fields": {}, + "geometries": {"POLYGON", "MULTIPOLYGON"}, + "about": "Display vector geometries in an ordered list.", + "name": "Uvw" + }, + "csv_input": { + "type": "csv", + "about": "Unicode characters work too 😎", + "name": "☺", + "columns": { + "a": { + "type": "number", + "units": ureg.second, + "about": "Here's a description." + } + } + }, + "directory_input": { + "type": "directory", + "about": "Here's a directory", + "name": "Foo", + "contents": { + "baz": { + "type": "csv", + "required": False, + "columns": { + "id": {"type": "code"}, + "description": { + "type": "freestyle_string", + "required": False, + "about": "a description of the id" + }, + "raster_path": { + "type": "raster", + "bands": { + 1: {"type": "number", "units": ureg.meter} + } + } + } + } + } + } + } +} diff --git a/requirements.txt b/requirements.txt index 833c5c1c..aa31e190 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ -natcap.invest git-lfs +git+https://github.com/emlys/invest.git@task/invest.users-guide/30 +pint setuptools_scm sphinx diff --git a/source/conf.py b/source/conf.py index 9eccd3f5..2e4ff534 100644 --- a/source/conf.py +++ b/source/conf.py @@ -1,28 +1,29 @@ -import sys, os +import os +import sys import subprocess import setuptools_scm # Just fail the process if this can't be found. -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# sys.path.append(os.path.abspath('.')) +# add to the path so that sphinx can find our custom extension +sys.path.append(os.path.abspath('../extensions/investspec')) + +# this is for the ReadTheDocs build, where conf.py is the only place we can +# run arbitrary commands such as checking out the sample data +subprocess.run(['make', '-C', '..', 'prep_sampledata']) # -- General configuration ----------------------------------------------------- -if not os.path.exists('../invest-sample-data'): - subprocess.run(['make', '-C', '..', 'get_sampledata']) -if not os.path.exists('invest-sample-data/pollination/landcover_biophysical_table_modified.csv'): - subprocess.run(['make', '-C', '..', 'prep_sampledata']) # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.mathjax'] +extensions = ['sphinx.ext.mathjax', 'investspec'] + +# config value for the investspec custom extension +# this is prefixed onto the :investspec: role's `module` argument before importing +# this way, we don't have to write 'natcap.invest' every time +investspec_module_prefix = 'natcap.invest' # Enable figure number referencing with the :numref: syntax numfig = True -# Add any paths that contain templates here, relative to this directory. -templates_path = ['.templates'] - gettext_compact = False # The suffix of source filenames. @@ -41,7 +42,8 @@ user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:88.0) Gecko/20100101 Firefox/88.0' # this link has been unreliable but still seems to exist as of 3/2021. revisit in the future -linkcheck_ignore = ['http://trapdoor.bren.ucsb.edu/research/2014Group_Projects/documents/BermudaWind_Final_Report_2014-05-07.pdf'] +linkcheck_ignore = [ + 'http://trapdoor.bren.ucsb.edu/research/2014Group_Projects/documents/BermudaWind_Final_Report_2014-05-07.pdf'] # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -72,7 +74,7 @@ version = version.replace('+', '+ug.') # The full version, including alpha/beta/rc tags. -print (f'Version: {version}') +print(f'Version: {version}') # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' diff --git a/source/getting_started.rst b/source/getting_started.rst index 99ade8de..87ef88cf 100644 --- a/source/getting_started.rst +++ b/source/getting_started.rst @@ -87,18 +87,18 @@ This high-level tutorial gives you an idea of the main activities involved with - See the :ref:`using-sample-data` section of this chapter for more information. - Time required to try out a model with sample data: Low 4. **Create your own data for the baseline case** - - Gather the spatial and non-spatial model inputs required by the model. - - Process the collected data for your area of interest, so that each input meets InVEST requirements. + - Gather the spatial and non-spatial model inputs required by the model. + - Process the collected data for your area of interest, so that each input meets InVEST requirements. - Most of this processing will be done using GIS software. - Many models also require an extensive literature search for certain parameters. - Consult the model's User Guide chapter and sample data for requirements and examples of what the datasets should look like. - Also consult the :ref:`formatting-data` section of this chapter for general tips. - - The amount of processing will vary greatly, depending on the particular model input and format of the original source. + - The amount of processing will vary greatly, depending on the particular model input and format of the original source. - It’s best to look for more local sources of data, but if these aren’t available, coarser, global layers and values are generally available. The User Guide Appendix for each model provides pointers to some global data sources. - Time required to create data for one model: High. 5. **Create future scenarios** - Analyzing scenarios is optional, but commonly done. - - Scenarios are often based on altering land use/land cover, habitat, or land management maps to reflect the impacts of a proposed intervention, or climate change. + - Scenarios are often based on altering land use/land cover, habitat, or land management maps to reflect the impacts of a proposed intervention, or climate change. - Creating scenarios may be very time-consuming if, for example, a stakeholder process is used, or climate modeling is required. - Time required to create scenarios: Medium to High. 6. **Run the model** @@ -106,11 +106,11 @@ This high-level tutorial gives you an idea of the main activities involved with - See the :ref:`running-models` section of this chapter for more information. - Time required: Low to Medium, depending on size and complexity of the input data, and which model is being run. More time is required for high-resolution data and/or large areas of interest. 7. **Examine model results** - - Critically examine the results using GIS software - do the patterns and values make sense? + - Critically examine the results using GIS software - do the patterns and values make sense? - Unexplained patterns or abnormally high or low values might indicate problems with the input data. - Time requirement: Low to Medium. 8. **Calibrate the model** - - Optional, not necessarily required for a high-level screening analysis and/or if field data is not available for validation. + - Optional, not necessarily required for a high-level screening analysis and/or if field data is not available for validation. - However, it is important to do calibration if valuation is being done using the model results. - Collect and process observed data corresponding to the InVEST model output of interest. For example, sediment load from a monitoring station in a reservoir intake. - Adjust model inputs to produce an agreement between modeled results and observed data. @@ -131,7 +131,7 @@ This high-level tutorial gives you an idea of the main activities involved with - Post-processing may also be done with the results. For example, you may show a map of protected areas overlaid on an InVEST result map. Or combine multiple ecosystem service maps into a "hotspot" map of greatest combined service provision. Or aggregate results within an area of interest. - Remember to choose your colors and symbols to be easy to read by a wide audience, intuitive to interpret, and such that they accurately represent the results. - Time required: Low to Medium, depending on the complexity of project needs. - + GIS Skills ========== @@ -142,27 +142,27 @@ This User Guide assumes that you have the required GIS skills, it does not gener Here are some examples of the types of GIS tasks that are typically part of an InVEST analysis. This is not a comprehensive list: -+ View and navigate raster, vector and tabular data - ++ View and navigate raster, vector and tabular data + + Symbolize raster and vector data - + + Reproject data layers to a common coordinate system - + + Clip data layers to an area of interest - + + Convert vector to raster and raster to vector - + + Create new point, line or polygon layers - + + Edit vector attribute tables - + + Perform a variety of raster math - + + Reclassify raster values - + + Resample rasters -Also see the :ref:`working-with-the-DEM` section of this chapter, which does provide some detail regarding GIS processing of digital elevation model (DEM) data for use in the SDR, NDR, Seasonal Water Yield, Scenic Quality and Coastal Vulnerability models. +Also see the :ref:`working-with-the-DEM` section of this chapter, which does provide some detail regarding GIS processing of digital elevation model (DEM) data for use in the SDR, NDR, Seasonal Water Yield, Scenic Quality and Coastal Vulnerability models. Standalone InVEST Tools @@ -346,15 +346,15 @@ Here are some tips for working with the DEM and creating a hydrologically-correc The stream network generated by the model from the DEM should closely match the streams on a known correct stream map. Several of the InVEST hydrology models and the supporting InVEST tool RouteDEM output a stream network (usually called *stream.tif*.) These tools create streams by first generating a Flow Accumulation raster, then applying the user input 'threshold flow accumulation' (TFA) value to select pixels that should be part of the stream network. For example, if a TFA value of 1000 is given, this says that 1000 pixels must drain into a particular pixel before it's considered part of a stream. This is the equivalent of saying that streams are defined by having a flow accumulation value >= 1000. - Use these *stream.tif* outputs to evaluate how well the modelled streams match reality, and adjust the threshold flow accumulation accordingly. Larger values of TFA will produce coarser stream networks with fewer tributaries, smaller values of TFA will produce more tributaries. There is no one "correct" value for TFA, it will be different for each area of interest and DEM. A good value to start with for testing is 1000. When comparing *stream.tif* with a real-world stream map, check that you have the appropriate granularity of tributaries, and make sure that the *stream.tif* streams are continuous, not chopped in disconnected segments or individual pixels. If the modeled streams are discontinuous, try doing another Fill on the DEM, and make sure that you used BILINEAR or CUBIC resampling method for reprojecting. If a DEM does not make continuous streams, then we advise trying another source of elevation data, there are several globally-available sources, and they each perform differently in different places in the world. + Use these *stream.tif* outputs to evaluate how well the modelled streams match reality, and adjust the threshold flow accumulation accordingly. Larger values of TFA will produce coarser stream networks with fewer tributaries, smaller values of TFA will produce more tributaries. There is no one "correct" value for TFA, it will be different for each area of interest and DEM. A good value to start with for testing is 1000. When comparing *stream.tif* with a real-world stream map, check that you have the appropriate granularity of tributaries, and make sure that the *stream.tif* streams are continuous, not chopped in disconnected segments or individual pixels. If the modeled streams are discontinuous, try doing another Fill on the DEM, and make sure that you used BILINEAR or CUBIC resampling method for reprojecting. If a DEM does not make continuous streams, then we advise trying another source of elevation data, there are several globally-available sources, and they each perform differently in different places in the world. - To create flow accumulation and stream maps without needing to run a whole hydrology model, you can use the InVEST tool `RouteDEM `_, which is specifically for processing the DEM. See the `RouteDEM chapter of the User Guide `_ for more information. + To create flow accumulation and stream maps without needing to run a whole hydrology model, you can use the InVEST tool RouteDEM, which is specifically for processing the DEM. See the :ref:`RouteDEM page ` for more information. + **Creating watersheds** It is recommended to create watersheds from the DEM that you will be using in the analysis. If a watershed map is obtained from elsewhere, the boundaries of the watershed(s) might not line up correctly with the hydrology created from the DEM, leading to incorrect aggregated results. - There are a variety of tools that can create watersheds, including the ArcGIS Watershed tool and QGIS Watershed basins or r.basins.fill. InVEST also provides a tool called `DelineateIt `_, which works well, is simple to use, and is recommended. It has the advantage of being able to create watersheds that overlap, such as when there are several dams along the same river. See the `DelineateIt section of the User Guide `_ for more information. + There are a variety of tools that can create watersheds, including the ArcGIS Watershed tool and QGIS Watershed basins or r.basins.fill. InVEST also provides a tool called DelineateIt, which works well, is simple to use, and is recommended. It has the advantage of being able to create watersheds that overlap, such as when there are several dams along the same river. See the :ref:`DelineateIt page ` for more information. After watersheds are generated, verify that they represent the catchments correctly and that each watershed is assigned a unique integer ID in the field "ws_id" (or "subws_id", depending on the model - see the Data Needs section of the hydrology model you're using to find out what's required.) diff --git a/source/index.rst b/source/index.rst index 2d17c1ee..49441dc1 100644 --- a/source/index.rst +++ b/source/index.rst @@ -45,6 +45,7 @@ Introduction and Getting Started the_need_for getting_started + input_types .. toctree:: :maxdepth: 1 diff --git a/source/input_types.rst b/source/input_types.rst new file mode 100644 index 00000000..e50d73f3 --- /dev/null +++ b/source/input_types.rst @@ -0,0 +1,199 @@ +*********** +Input Types +*********** + +InVEST models accept many different types of spatial and non-spatial data. We categorize them into 12 types: + +.. _number: + +number +------ +A scalar value. You may enter numbers in integer or floating-point format. Fractions are not allowed. + +Units +~~~~~ +Most numeric inputs have units. InVEST models use standard SI units and symbols as much as possible. For definitions of these symbols, see `SI Units `_ and `units accepted for use with SI `_. + +InVEST also uses the non-SI unit *years*. + +When the unit is *currency*, any currency may be used, but the same currency must be used throughout the model. + + +.. _ratio: + +ratio +----- +A unitless proportion in the range 0 - 1, where 0 represents "none" and 1 represents "all". +Some ratio inputs may be less than 0 or greater than 1, while others are strictly limited to the 0-1 range. + +.. _percent: + +percent +------- +A unitless proportion in the range 0 - 100, where 0 represents "none" and 100 represents "all". +Some percent inputs may be less than 0 or greater than 100, while others are strictly limited to the 0-100 range. + +.. _integer: + +integer +------- +A whole number (positive or negative). Integers are often used to uniquely identify something. +Because rasters can only store numeric data, it is common to use integer codes as labels that map to non-numeric data +(such as land use/landcover types and soil groups). + +.. _text: + +text +---- +Freeform text. InVEST accepts any Unicode (UTF-8) character. For best results, use Unicode character sets for non-Latin alphabets. + +.. _option: + +option +------ +A choice from a fixed set of options. Options are usually displayed as a dropdown menu. Some CSV inputs have columns that must contain values from a fixed set of options. In that case, be sure to enter the options exactly as they are written. + + +.. _truefalse: + +true/false +---------- +True/false inputs are usually displayed as a checkbox. Some CSV inputs have true/false columns. In that case, the model's documentation will explain exactly how to enter the values (``TRUE`` or ``FALSE``, ``1`` or ``0``, etc.) + + +.. _csv: + +CSV +--- +A table of data stored in a .csv (comma-separated value) format. You may use any spreadsheet software to create and edit a table, and then export it to CSV format. + + +.. _raster: + +raster +------ +A spatial data file that stores data in pixels, which each have a numeric value. +InVEST accepts all GDAL-supported raster formats, including GeoTiff (.tif) and many more. +For the complete list, see https://gdal.org/drivers/raster/index.html. + +Bands +~~~~~ +Rasters may have multiple bands. All InVEST models look at the first band only. If you are using a multi-band raster, +please be sure that the correct dataset is in the first band. + + +.. _datatypes: + +Data types +~~~~~~~~~~ +Every raster has a *data type* which determines the minimum and maximum value that each pixel can have. Some data types allow positive and negative numbers, while others only allow positive numbers. Most of the time you will not need to change your raster's data type, but it is important to be aware of. + +Understanding data types +^^^^^^^^^^^^^^^^^^^^^^^^ +A raster's data type is displayed as a code in the raster metadata, which may be viewed in your GIS software. A data type code has 3 components: + +1. Unsigned marker (**u**), optional + + By default, data types are signed, meaning that they include a sign (+ or -) indicating whether the number is positive or negative. + You need a signed data type to store negative data. + If a data type begins with a **u**, that means it is unsigned. All unsigned data is positive. If you do not need to store negative data,myou can save space by using an unsigned type. + + This distinction only exists for integer data types. Float types are always signed. + +2. Type (**float** or **int**) + + Floating-point (float) types can store digits after the decimal point. There is no hard limit on how many decimal places they can store, but they are only accurate to a limited number of total digits (before and after the decimal point). + Integer (int) types can only store whole numbers. They have perfect accuracy. + It is best to use integer data types when possible for discrete data. + +3. Size (**8, 16, 32, 64**) + + This is how many bits are used to store the number. It determines the range of numbers that can fit into the data type. + You can save space by using the smallest size that works for your data. For example, the large numbers available in a **float64** raster are rarely needed. The **float32** range is sufficient for most real-world data, and it uses half as much space. + The **uint8** type is sufficient for most discrete data that InVEST uses (land use/land cover classes, soil groups, and so on) which have fewer than 256 possible values. + +Here are all the standard raster data types and their ranges (ranges include the starting and ending values): + +- **byte** (**uint8**): any integer from 0 to 255 +- **uint16**: any integer from 0 to 65,535 +- **uint32**: any integer from 0 to 4.2x10 :sup:`9` +- **int16**: any integer from -32,768 to 32,767 +- **int32**: any integer from -2.1x10 :sup:`9` to 2.1x10 :sup:`9` +- **float32**: any number from -3.4x10 :sup:`38` to 3.4x10 :sup:`38` (accurate to about 7 digits) + + .. note:: + Larger numbers are accurate to fewer decimal digits. Numbers larger than 2 :sup:`23` (about 8,000,000) use all 7 digits on the left side of the decimal point and cannot have any decimal part at all. Most real-world data has fewer than 7 significant figures so this is not a problem. If you do need decimal precision for such large numbers, use **float64** instead. + +- **float64**: any number from -1.7x10 :sup:`308` to 1.7x10 :sup:`308` (accurate to about 16 digits) + +.. note:: + A **signed byte** type (**int8**, -128 to 127) also exists. ArcGIS in particular may produce signed byte rasters. Because GDAL does not directly support the **int8** type, we recommend avoiding it. InVEST works around this to allow the **int8** type in some cases. If you must use an **int8** raster, it is even more important to check that your intermediate and final results make sense. + + +Nodata values +~~~~~~~~~~~~~ +Rasters may have a *nodata* value that indicates areas where no data exists. Pixels with this value are excluded from calculations. +The nodata value must be encoded in the raster's metadata (otherwise, InVEST won't know what it is). + +Choosing a nodata value +^^^^^^^^^^^^^^^^^^^^^^^ +A good choice of nodata value is well outside the range of real data values, while still being within the raster data type's range. + +-1 is a good choice of nodata value if both of these conditions are met: + +- the data is always non-negative, and +- the raster's data type is signed + +If these conditions are not met, the maximum value for the data type is a good choice. The minimum value may also be used for +signed data types (do not use the minimum value for unsigned types: it is 0, which is usually a valid data value). These are good choices because they are usually much larger or smaller than the range of the valid data, so they will not conflict. +Discrete data is the only exception: for an integer raster such as land use/land cover, you may choose any value in the data type's range that is not a valid data value. + +These recommendations are summarized in the table below. + ++------------------------------------+----------------------------+-------------------------+-----------------------------------------+ +| **Continuous data (float data types)** | **Discrete data (integer data types)** | ++====================================+============================+=========================+=========================================+ +| | **Signed** data type | **Unsigned** data type | Any integer in the data type range | ++------------------------------------+----------------------------+-------------------------+ that is not a valid data value | +| All valid data is **non-negative** | -1 | Data type maximum value | (commonly 0, -1, or the data type | ++------------------------------------+----------------------------+-------------------------+ maximum or minimum) | +| Valid data may be **negative** | Data type maximum or | | | +| | minimum value | | | ++------------------------------------+----------------------------+-------------------------+-----------------------------------------+ + + +Common problems +^^^^^^^^^^^^^^^ +Incorrectly set nodata values are a very common cause of user problems with InVEST. Some common mistakes are: + +- Not setting a nodata value. It is common to use a value, like 0 or -1, to represent nodata areas. + If that value is not set in the raster metadata, InVEST will treat it like valid data. + This will cause incorrect results or an error. You must set a nodata value unless every pixel in your raster has valid data (this is uncommon). You can view and edit your raster's metadata, including the nodata value, in your GIS software. + +- Using an unsuitable nodata value. Your nodata value must: + + - be within the range allowed by the raster's :ref:`data type `. Using a value outside this range can cause unexpected results. + - not conflict with real data. Make sure it is well outside the range of possible data values. + + +.. _vector: + +vector +------ +A spatial data file that stores data in geometric shapes, which each may have multiple data attributes. +InVEST accepts all GDAL-supported vector formats, including GeoPackage (.gpkg), ESRI Shapefile (.shp), and many more. +For the complete list, see https://gdal.org/drivers/vector/index.html. + +.. _directory: + +directory +--------- +A folder that may or may not need to contain other files or folders used by the model. + +.. _file: + +file +---- +Any other type of file not listed above. + +