Skip to content

Commit

Permalink
Broken tutorial (#162)
Browse files Browse the repository at this point in the history
* extract method build_loader

* add tests to cover replace_dir behaviour

* black box testing to understand build_loader method

* add test to reproduce issue

* began to fix problem

* add test case to understand parser syntax

* improved config.py docstrings

* test driven rules for combining patterns with directories

* refactor functions and test suite

* separate responsibilities between locator and loader constructors

* moved Loader construction logic into LoaderBuilder class

* extract loader_builder module

* simplify design using classmethods instead of builder pattern

* add export to support forest.Loader on import

* inline staticmethod and add docstrings

* add suggestions to tutorial

* change start.rst to match tutorial files

* adjust dates to make tutorial easier

* roll version forward to 0.4.3
  • Loading branch information
andrewgryan authored Oct 8, 2019
1 parent b02be26 commit 6e9e316
Show file tree
Hide file tree
Showing 13 changed files with 588 additions and 73 deletions.
23 changes: 15 additions & 8 deletions doc/source/start.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,25 +92,32 @@ nice if we could store our settings and use them in a reproducible way!
Example - Multiple data sources
-------------------------------

Open up `config.yml` for an example of the settings that can be adjusted
Open up `multi-config.yaml` for an example of the settings that can be adjusted
to suit your particular use case.

.. code-block:: yaml
files:
- label: UM
pattern: unified_model*.nc
locator: file_system
- label: RDT
pattern: rdt*.json
pattern: "unified_model*.nc"
locator: file_system
file_type: unified_model
- label: EIDA50
pattern: eida50*.nc
pattern: "eida50*.nc"
locator: file_system
file_type: eida50
- label: RDT
pattern: "rdt*.json"
locator: file_system
file_type: rdt
Running the following command should load FOREST with a model diagnostic,
satellite image and derived polygon product at the same time that can be
simultaneously compared
simultaneously compared.

.. code-block:: bash
:> forest --show --config-file multi-config.yaml
Example - Going faster with SQL
-------------------------------
Expand All @@ -125,7 +132,7 @@ files and indices.

.. code-block:: sh
:> forest --show --config-file config.yml --database database.db
:> forest --show --config-file um-config.yaml --database database.db
To generate a database from scratch use the `forestdb` command.

Expand Down
7 changes: 6 additions & 1 deletion forest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,12 @@
.. automodule:: forest.keys
.. automodule:: forest.config
.. automodule:: forest.load
"""
__version__ = '0.4.2'
__version__ = '0.4.3'

from .config import *
from . import (
Expand All @@ -19,3 +23,4 @@
tutorial)
from .db import Database
from .keys import *
from .load import *
84 changes: 83 additions & 1 deletion forest/config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,25 @@
"""Application configuration"""
"""
Configure application
---------------------
This module implements parsers and data structures
needed to configure the application. It supports
richer settings than those that can be easily
represented on the command line by leveraging file formats
such as YAML and JSON that are widely used to configure
applications.
.. autoclass:: Config
:members:
.. autoclass:: FileGroup
:members:
.. autofunction:: load_config
.. autofunction:: from_files
"""
import os
import yaml
from forest.export import export
Expand All @@ -8,6 +29,20 @@


class Config(object):
"""Configuration data structure
This high-level object represents the application configuration.
It is file format agnostic but has helper methods to initialise
itself from disk or memory.
.. note:: This class is intended to provide the top-level
configuration with low-level details implemented
by specialist classes, e.g. :class:`FileGroup`
which contains meta-data for files
:param data: native Python data structure representing application
settings
"""
def __init__(self, data):
self.data = data

Expand All @@ -25,6 +60,32 @@ def patterns(self):

@classmethod
def load(cls, path):
"""Parse settings from either YAML or JSON file on disk
The configuration can be controlled elegantly
through a text file. Groups of files can
be specified in a list.
.. note:: Relative or absolute directories are
declared through the use of a leading /
.. code-block:: yaml
files:
- label: Trial
pattern: "*.nc"
directory: trial/output
- label: Control
pattern: "*.nc"
directory: control/output
- label: RDT
pattern: "*.json"
directory: /satellite/rdt/json
file_type: rdt
:param path: JSON/YAML file to load
:returns: instance of :class:`Config`
"""
with open(path) as stream:
try:
# PyYaml 5.1 onwards
Expand All @@ -35,6 +96,12 @@ def load(cls, path):

@classmethod
def from_files(cls, files, file_type="unified_model"):
"""Configure using list of file names and a file type
:param files: list of file names
:param file_type: keyword to apply to all files
:returns: instance of :class:`Config`
"""
return cls({
"files": [dict(pattern=f, label=f, file_type=file_type)
for f in files]})
Expand All @@ -46,6 +113,19 @@ def file_groups(self):


class FileGroup(object):
"""Meta-data needed to describe group of files
To describe a collection of related files extra
meta-data is needed. For example, the type of data
contained within the files or how data is catalogued
and searched.
:param label: decription used by buttons and tooltips
:param pattern: wildcard pattern used by either SQL or glob
:param locator: keyword describing search method (default: 'file_system')
:param file_type: keyword describing file contents (default: 'unified_model')
:param directory: leaf/absolute directory where file(s) are stored (default: None)
"""
def __init__(self,
label,
pattern,
Expand Down Expand Up @@ -98,9 +178,11 @@ def _str(value):

@export
def load_config(path):
"""Load configuration from a file"""
return Config.load(path)


@export
def from_files(files, file_type):
"""Define configuration with a list of files"""
return Config.from_files(files, file_type)
13 changes: 0 additions & 13 deletions forest/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,19 +83,6 @@ def add_loader(name, loader):
LOADERS[name] = loader


def file_loader(file_type, pattern):
if file_type.lower() == 'rdt':
return rdt.Loader(pattern)
elif file_type.lower() == 'gpm':
return GPM(pattern)
elif file_type.lower() == 'earthnetworks':
return earth_networks.Loader(pattern)
elif file_type.lower() == 'eida50':
return satellite.EIDA50(pattern)
else:
raise Exception("unrecognised file_type: {}".format(file_type))


def load_coastlines():
return xs_ys(iterlines(
cartopy.feature.COASTLINE.geometries()))
Expand Down
9 changes: 8 additions & 1 deletion forest/earth_networks.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os
import glob
import datetime as dt
import pandas as pd
from forest import geo
Expand Down Expand Up @@ -53,7 +55,12 @@ def add_figure(self, figure):
class Loader(object):
def __init__(self, paths):
self.paths = paths
self.frame = self.read(paths)
if len(self.paths) > 0:
self.frame = self.read(paths)

@classmethod
def pattern(cls, text):
return cls(list(sorted(glob.glob(os.path.expanduser(text)))))

def load_date(self, date):
frame = self.frame.set_index('date')
Expand Down
Loading

0 comments on commit 6e9e316

Please sign in to comment.