diff --git a/notebooks/c05_Big_Data/Working_with_Big_Data.ipynb b/notebooks/c05_Big_Data/Working_with_Big_Data.ipynb index 904bc2a..b0c8b0a 100644 --- a/notebooks/c05_Big_Data/Working_with_Big_Data.ipynb +++ b/notebooks/c05_Big_Data/Working_with_Big_Data.ipynb @@ -82,9 +82,21 @@ " pid = os.getpid()\n", " mem_bytes = psutil.Process(pid).memory_info().rss\n", " print('[Process {} uses {:.1f}MB]'.format(pid, mem_bytes / 1024 / 1024))\n", - " return mem_bytes / 1024 / 1024\n" + " return mem_bytes / 1024 / 1024" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2020-10-04T05:48:45.857630Z", + "start_time": "2020-10-04T05:48:45.846756Z" + } + }, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": 4, @@ -501,8 +513,8 @@ "execution_count": null, "metadata": { "ExecuteTime": { - "end_time": "2020-10-04T05:48:45.857630Z", - "start_time": "2020-10-04T05:48:45.846756Z" + "end_time": "2020-10-04T06:40:44.790440Z", + "start_time": "2020-10-04T06:40:43.466984Z" } }, "outputs": [], @@ -784,12 +796,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-10-04T06:40:44.790440Z", - "start_time": "2020-10-04T06:40:43.466984Z" - } - }, + "metadata": {}, "outputs": [], "source": [] }, @@ -2257,6 +2264,2025 @@ "" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Xarray\n", + "\n", + "Xarray is pandas for N-dimensional data. It also has a [dask backend](http://xarray.pydata.org/en/stable/dask.html)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "ExecuteTime": { + "end_time": "2020-10-12T08:10:18.705068Z", + "start_time": "2020-10-12T08:10:18.539317Z" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
<xarray.Dataset>\n", + "Dimensions: (time: 36, x: 275, y: 205)\n", + "Coordinates:\n", + " * time (time) object 1980-09-16 12:00:00 ... 1983-08-17 00:00:00\n", + " xc (y, x) float64 dask.array<chunksize=(205, 275), meta=np.ndarray>\n", + " yc (y, x) float64 dask.array<chunksize=(205, 275), meta=np.ndarray>\n", + "Dimensions without coordinates: x, y\n", + "Data variables:\n", + " Tair (time, y, x) float64 dask.array<chunksize=(10, 205, 275), meta=np.ndarray>\n", + "Attributes:\n", + " title: /workspace/jhamman/processed/R1002RBRxaaa01a/l...\n", + " institution: U.W.\n", + " source: RACM R1002RBRxaaa01a\n", + " output_frequency: daily\n", + " output_mode: averaged\n", + " convention: CF-1.4\n", + " references: Based on the initial model of Liang et al., 19...\n", + " comment: Output from the Variable Infiltration Capacity...\n", + " nco_openmp_thread_number: 1\n", + " NCO: netCDF Operators version 4.7.9 (Homepage = htt...\n", + " history: Fri Aug 7 17:57:38 2020: ncatted -a bounds,,d...
array([cftime.DatetimeNoLeap(1980, 9, 16, 12, 0, 0, 0, 5, 259),\n", + " cftime.DatetimeNoLeap(1980, 10, 17, 0, 0, 0, 0, 1, 290),\n", + " cftime.DatetimeNoLeap(1980, 11, 16, 12, 0, 0, 0, 3, 320),\n", + " cftime.DatetimeNoLeap(1980, 12, 17, 0, 0, 0, 0, 6, 351),\n", + " cftime.DatetimeNoLeap(1981, 1, 17, 0, 0, 0, 0, 2, 17),\n", + " cftime.DatetimeNoLeap(1981, 2, 15, 12, 0, 0, 0, 3, 46),\n", + " cftime.DatetimeNoLeap(1981, 3, 17, 0, 0, 0, 0, 5, 76),\n", + " cftime.DatetimeNoLeap(1981, 4, 16, 12, 0, 0, 0, 0, 106),\n", + " cftime.DatetimeNoLeap(1981, 5, 17, 0, 0, 0, 0, 3, 137),\n", + " cftime.DatetimeNoLeap(1981, 6, 16, 12, 0, 0, 0, 5, 167),\n", + " cftime.DatetimeNoLeap(1981, 7, 17, 0, 0, 0, 0, 1, 198),\n", + " cftime.DatetimeNoLeap(1981, 8, 17, 0, 0, 0, 0, 4, 229),\n", + " cftime.DatetimeNoLeap(1981, 9, 16, 12, 0, 0, 0, 6, 259),\n", + " cftime.DatetimeNoLeap(1981, 10, 17, 0, 0, 0, 0, 2, 290),\n", + " cftime.DatetimeNoLeap(1981, 11, 16, 12, 0, 0, 0, 4, 320),\n", + " cftime.DatetimeNoLeap(1981, 12, 17, 0, 0, 0, 0, 0, 351),\n", + " cftime.DatetimeNoLeap(1982, 1, 17, 0, 0, 0, 0, 3, 17),\n", + " cftime.DatetimeNoLeap(1982, 2, 15, 12, 0, 0, 0, 4, 46),\n", + " cftime.DatetimeNoLeap(1982, 3, 17, 0, 0, 0, 0, 6, 76),\n", + " cftime.DatetimeNoLeap(1982, 4, 16, 12, 0, 0, 0, 1, 106),\n", + " cftime.DatetimeNoLeap(1982, 5, 17, 0, 0, 0, 0, 4, 137),\n", + " cftime.DatetimeNoLeap(1982, 6, 16, 12, 0, 0, 0, 6, 167),\n", + " cftime.DatetimeNoLeap(1982, 7, 17, 0, 0, 0, 0, 2, 198),\n", + " cftime.DatetimeNoLeap(1982, 8, 17, 0, 0, 0, 0, 5, 229),\n", + " cftime.DatetimeNoLeap(1982, 9, 16, 12, 0, 0, 0, 0, 259),\n", + " cftime.DatetimeNoLeap(1982, 10, 17, 0, 0, 0, 0, 3, 290),\n", + " cftime.DatetimeNoLeap(1982, 11, 16, 12, 0, 0, 0, 5, 320),\n", + " cftime.DatetimeNoLeap(1982, 12, 17, 0, 0, 0, 0, 1, 351),\n", + " cftime.DatetimeNoLeap(1983, 1, 17, 0, 0, 0, 0, 4, 17),\n", + " cftime.DatetimeNoLeap(1983, 2, 15, 12, 0, 0, 0, 5, 46),\n", + " cftime.DatetimeNoLeap(1983, 3, 17, 0, 0, 0, 0, 0, 76),\n", + " cftime.DatetimeNoLeap(1983, 4, 16, 12, 0, 0, 0, 2, 106),\n", + " cftime.DatetimeNoLeap(1983, 5, 17, 0, 0, 0, 0, 5, 137),\n", + " cftime.DatetimeNoLeap(1983, 6, 16, 12, 0, 0, 0, 0, 167),\n", + " cftime.DatetimeNoLeap(1983, 7, 17, 0, 0, 0, 0, 3, 198),\n", + " cftime.DatetimeNoLeap(1983, 8, 17, 0, 0, 0, 0, 6, 229)], dtype=object)
\n",
+ "
| \n",
+ "\n", + "\n", + " | \n", + "
\n",
+ "
| \n",
+ "\n", + "\n", + " | \n", + "
\n",
+ "
| \n",
+ "\n", + "\n", + " | \n", + "
<xarray.Dataset>\n", + "Dimensions: (x: 275, y: 205)\n", + "Coordinates:\n", + " time object 1981-07-17 00:00:00\n", + " xc (y, x) float64 dask.array<chunksize=(205, 275), meta=np.ndarray>\n", + " yc (y, x) float64 dask.array<chunksize=(205, 275), meta=np.ndarray>\n", + "Dimensions without coordinates: x, y\n", + "Data variables:\n", + " Tair (y, x) float64 dask.array<chunksize=(205, 275), meta=np.ndarray>\n", + "Attributes:\n", + " title: /workspace/jhamman/processed/R1002RBRxaaa01a/l...\n", + " institution: U.W.\n", + " source: RACM R1002RBRxaaa01a\n", + " output_frequency: daily\n", + " output_mode: averaged\n", + " convention: CF-1.4\n", + " references: Based on the initial model of Liang et al., 19...\n", + " comment: Output from the Variable Infiltration Capacity...\n", + " nco_openmp_thread_number: 1\n", + " NCO: netCDF Operators version 4.7.9 (Homepage = htt...\n", + " history: Fri Aug 7 17:57:38 2020: ncatted -a bounds,,d...
array(cftime.DatetimeNoLeap(1981, 7, 17, 0, 0, 0, 0, 1, 198), dtype=object)
\n",
+ "
| \n",
+ "\n", + "\n", + " | \n", + "
\n",
+ "
| \n",
+ "\n", + "\n", + " | \n", + "
\n",
+ "
| \n",
+ "\n", + "\n", + " | \n", + "
<xarray.DataArray 'Tair' (time: 4)>\n", + "dask.array<getitem, shape=(4,), dtype=float64, chunksize=(1,), chunktype=numpy.ndarray>\n", + "Coordinates:\n", + " * time (time) object 1980-12-31 00:00:00 ... 1983-12-31 00:00:00\n", + " xc float64 dask.array<chunksize=(), meta=np.ndarray>\n", + " yc float64 dask.array<chunksize=(), meta=np.ndarray>
\n",
+ "
| \n",
+ "\n", + "\n", + " | \n", + "
array([cftime.DatetimeNoLeap(1980, 12, 31, 0, 0, 0, 0, 6, 365),\n", + " cftime.DatetimeNoLeap(1981, 12, 31, 0, 0, 0, 0, 0, 365),\n", + " cftime.DatetimeNoLeap(1982, 12, 31, 0, 0, 0, 0, 1, 365),\n", + " cftime.DatetimeNoLeap(1983, 12, 31, 0, 0, 0, 0, 2, 365)], dtype=object)
\n",
+ "
| \n",
+ "\n", + "\n", + " | \n", + "
\n",
+ "
| \n",
+ "\n", + "\n", + " | \n", + "
<xarray.DataArray 'Tair' (time: 4)>\n", + "array([ 6.75662201, 8.97479849, 10.49235584, 9.59892096])\n", + "Coordinates:\n", + " * time (time) object 1980-12-31 00:00:00 ... 1983-12-31 00:00:00\n", + " xc float64 42.47\n", + " yc float64 44.82
array([ 6.75662201, 8.97479849, 10.49235584, 9.59892096])
array([cftime.DatetimeNoLeap(1980, 12, 31, 0, 0, 0, 0, 6, 365),\n", + " cftime.DatetimeNoLeap(1981, 12, 31, 0, 0, 0, 0, 0, 365),\n", + " cftime.DatetimeNoLeap(1982, 12, 31, 0, 0, 0, 0, 1, 365),\n", + " cftime.DatetimeNoLeap(1983, 12, 31, 0, 0, 0, 0, 2, 365)], dtype=object)
array(42.4748837)
array(44.82000698)