ilyar · ilyar · Apr 25, 2021
diff --git a/.gitignore b/.gitignore
@@ -1 +1,2 @@
 venv
+build
diff --git a/Makefile b/Makefile
@@ -13,12 +13,16 @@ freeze: venv
 	. venv/bin/activate; pip freeze > requirements.txt
 
 test:\
+src/weather\
 src/mnist
 
 clean:
 	rm -rf venv
+	rm -rf build
 
 src/%: venv
+	mkdir -p build
 	. venv/bin/activate; python src/$*.py
 
 mnist: src/mnist
+weather: src/weather
diff --git a/readme.md b/readme.md
@@ -13,6 +13,11 @@
 make mnist
 ```
 
+[Time series forecasting](https://www.tensorflow.org/tutorials/structured_data/time_series)
+
+```shell
+make weather
+```
 
 ## Clean
 

diff --git a/requirements.txt b/requirements.txt
@@ -1,8 +1,11 @@
 absl-py==0.12.0
 astunparse==1.6.3
+backcall==0.2.0
 cachetools==4.2.1
 certifi==2020.12.5
 chardet==4.0.0
+cycler==0.10.0
+decorator==5.0.7
 flatbuffers==1.12
 gast==0.4.0
 google-auth==1.29.0
@@ -11,26 +14,46 @@ google-pasta==0.2.0
 grpcio==1.34.1
 h5py==3.1.0
 idna==2.10
+ipython==7.22.0
+ipython-genutils==0.2.0
+jedi==0.18.0
 keras-nightly==2.5.0.dev2021032900
 Keras-Preprocessing==1.1.2
+kiwisolver==1.3.1
 Markdown==3.3.4
+matplotlib==3.4.1
 numpy==1.19.5
 oauthlib==3.1.0
 opt-einsum==3.3.0
+pandas==1.2.4
+parso==0.8.2
+pexpect==4.8.0
+pickleshare==0.7.5
+Pillow==8.2.0
+prompt-toolkit==3.0.18
 protobuf==3.15.8
+ptyprocess==0.7.0
 pyasn1==0.4.8
 pyasn1-modules==0.2.8
+Pygments==2.8.1
+pyparsing==2.4.7
+python-dateutil==2.8.1
+pytz==2021.1
 requests==2.25.1
 requests-oauthlib==1.3.0
 rsa==4.7.2
+scipy==1.6.2
+seaborn==0.11.1
 six==1.15.0
 tensorboard==2.5.0
 tensorboard-data-server==0.6.0
 tensorboard-plugin-wit==1.8.0
 tensorflow==2.5.0rc1
 tensorflow-estimator==2.5.0rc0
 termcolor==1.1.0
+traitlets==5.0.5
 typing-extensions==3.7.4.3
 urllib3==1.26.4
+wcwidth==0.2.5
 Werkzeug==1.0.1
 wrapt==1.12.1
diff --git a/src/util/baseline.py b/src/util/baseline.py
@@ -0,0 +1,13 @@
+import tensorflow as tf
+
+
+class Baseline(tf.keras.Model):
+    def __init__(self, label_index=None):
+        super().__init__()
+        self.label_index = label_index
+
+    def call(self, inputs):
+        if self.label_index is None:
+            return inputs
+        result = inputs[:, :, self.label_index]
+        return result[:, :, tf.newaxis]
diff --git a/src/util/savefig.py b/src/util/savefig.py
@@ -0,0 +1,19 @@
+import os
+
+from matplotlib import pyplot
+
+root_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+fig_count = 0
+
+
+def build_path():
+    return os.path.join(root_path, 'build')
+
+
+def savefig(plt: pyplot):
+    global fig_count
+    fig_count += 1
+    fig_path = os.path.join(build_path(), f'weather_fig{fig_count}.png')
+    plt.savefig(fig_path, bbox_inches='tight')
+    plt.close()
+    print(f'saved: {fig_path}')
diff --git a/src/util/window_generator.py b/src/util/window_generator.py
@@ -0,0 +1,132 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import tensorflow as tf
+
+
+class WindowGenerator:
+    def __init__(self, input_width, label_width, shift,
+                 train_df, val_df, test_df,
+                 label_columns=None):
+        # Store the raw data.
+        self.example = None
+        self.train_df = train_df
+        self.val_df = val_df
+        self.test_df = test_df
+
+        # Work out the label column indices.
+        self.label_columns = label_columns
+        if label_columns is not None:
+            self.label_columns_indices = {name: i for i, name in
+                                          enumerate(label_columns)}
+        self.column_indices = {name: i for i, name in
+                               enumerate(train_df.columns)}
+
+        # Work out the window parameters.
+        self.input_width = input_width
+        self.label_width = label_width
+        self.shift = shift
+
+        self.total_window_size = input_width + shift
+
+        self.input_slice = slice(0, input_width)
+        self.input_indices = np.arange(self.total_window_size)[self.input_slice]
+
+        self.label_start = self.total_window_size - self.label_width
+        self.labels_slice = slice(self.label_start, None)
+        self.label_indices = np.arange(self.total_window_size)[self.labels_slice]
+
+    def __repr__(self):
+        return '\n'.join([
+            f'Total window size: {self.total_window_size}',
+            f'Input indices: {self.input_indices}',
+            f'Label indices: {self.label_indices}',
+            f'Label column name(s): {self.label_columns}'])
+
+    def split_window(self, features):
+        inputs = features[:, self.input_slice, :]
+        labels = features[:, self.labels_slice, :]
+        if self.label_columns is not None:
+            labels = tf.stack(
+                [labels[:, :, self.column_indices[name]] for name in self.label_columns],
+                axis=-1)
+
+        # Slicing doesn't preserve static shape information, so set the shapes
+        # manually. This way the `tf.data.Datasets` are easier to inspect.
+        inputs.set_shape([None, self.input_width, None])
+        labels.set_shape([None, self.label_width, None])
+
+        return inputs, labels
+
+    def plot(self, model=None, plot_col='T (degC)', max_subplots=3):
+        inputs, labels = self.example
+        plt.figure(figsize=(12, 8))
+        plot_col_index = self.column_indices[plot_col]
+        max_n = min(max_subplots, len(inputs))
+        for n in range(max_n):
+            plt.subplot(max_n, 1, n + 1)
+            plt.ylabel(f'{plot_col} [normed]')
+            plt.plot(self.input_indices, inputs[n, :, plot_col_index],
+                     label='Inputs', marker='.', zorder=-10)
+
+            if self.label_columns:
+                label_col_index = self.label_columns_indices.get(plot_col, None)
+            else:
+                label_col_index = plot_col_index
+
+            if label_col_index is None:
+                continue
+
+            plt.scatter(self.label_indices, labels[n, :, label_col_index],
+                        edgecolors='k', label='Labels', c='#2ca02c', s=64)
+            if model is not None:
+                predictions = model(inputs)
+                plt.scatter(self.label_indices, predictions[n, :, label_col_index],
+                            marker='X', edgecolors='k', label='Predictions',
+                            c='#ff7f0e', s=64)
+
+            if n == 0:
+                plt.legend()
+
+        plt.xlabel('Time [h]')
+        return plt
+
+    def make_dataset(self, data):
+        data = np.array(data, dtype=np.float32)
+        ds = tf.keras.preprocessing.timeseries_dataset_from_array(
+            data=data,
+            targets=None,
+            sequence_length=self.total_window_size,
+            sequence_stride=1,
+            shuffle=True,
+            batch_size=32, )
+
+        ds = ds.map(self.split_window)
+
+        return ds
+
+    @property
+    def train(self):
+        return self.make_dataset(self.train_df)
+
+    @property
+    def val(self):
+        return self.make_dataset(self.val_df)
+
+    @property
+    def test(self):
+        return self.make_dataset(self.test_df)
+
+    @property
+    def example(self):
+        """Get and cache an example batch of `inputs, labels` for plotting."""
+        result = getattr(self, '_example', None)
+        if result is None:
+            # No example batch was found, so get one from the `.train` dataset
+            result = next(iter(self.train))
+            # And cache it for next time
+            self._example = result
+        return result
+
+    @example.setter
+    def example(self, value):
+        self._example = value