From ebbfd97288e234b701be81c1f18cb321a13ea27d Mon Sep 17 00:00:00 2001 From: Amanda Tan Date: Wed, 14 Apr 2021 22:46:19 -0700 Subject: [PATCH] update yml file --- 03_multidim_analysis.ipynb | 635 ++----------------------------------- environment.yml | 1 + 2 files changed, 30 insertions(+), 606 deletions(-) diff --git a/03_multidim_analysis.ipynb b/03_multidim_analysis.ipynb index 837013b..e98c79f 100644 --- a/03_multidim_analysis.ipynb +++ b/03_multidim_analysis.ipynb @@ -10,145 +10,74 @@ "- How do I work with multidimensional data like NetCDF files? \n", "\n", "### Objectives\n", - "- Learn how xarray can change your life" + "- Learn how to use xarray to conscisely work with multidimensional data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Xarray is an open source Python package that extends the labeled data functionality of Pandas to N-dimensional array-like datasets. It has a similar API to NumPy and Pandas, and supports both Dask and NumPy arrays." + "### Introduction\n", + "Xarray is an open source Python package that extends the labeled data functionality of Pandas to N-dimensional array-like datasets. It has a similar API to NumPy and Pandas, and supports both Dask and NumPy arrays. \n", + "\n", + "Xarray data structures can store netCDF, and GeoTiFFs. This notebook uses xarray to illustrate simple NDVI calculation using from GeoTIFFs. " ] }, { "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'xarray'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mxarray\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mxr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'xarray'" - ] - } - ], - "source": [ - "import xarray as xr" - ] - }, - { - "cell_type": "markdown", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "xarray supports direct serialization and IO to several file formats, from simple Pickle files to the more flexible netCDF format (recommended)." + "import os\n", + "import json\n", + "import rasterio\n", + "import requests\n", + "\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "Xarray data structures can store netCDF, and GeoTiFFs. This notebook uses Xarray to illustrate simple NDVI calculation using GeoTIFF dataset with XArray. We load L1 Landsat 8 image and compute NDVI (Normalized difference vegetation index). " + "import xarray as xr " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Here we explore the same dataset that we used , but use L1 this time" + "#### Recall that we are interested in looking at landuse over the State of Pará in Brazil, where extensive logging and illegal deforestation is happening. The Landsat tile we will be looking at is Path 227, Row 065. The date for the file we will be accessing is 8 June, 2020 and we will extract the NIR, red band and metadata file from the AWS s3 bucket\n" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "import os\n", - "import json\n", - "import rasterio\n", - "import requests\n", - "\n", - "import matplotlib.pyplot as plt\n", - "%matplotlib inline" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Landsat on AWS:\n", - "{'driver': 'GTiff', 'dtype': 'uint16', 'nodata': None, 'width': 7621, 'height': 7761, 'count': 1, 'crs': CRS.from_epsg(32621), 'transform': Affine(30.0, 0.0, 573285.0,\n", - " 0.0, -30.0, -683685.0), 'blockxsize': 512, 'blockysize': 512, 'tiled': True, 'compress': 'deflate', 'interleave': 'band'}\n" - ] - } - ], - "source": [ + "# Open path to file on s3 bucket with rasterio\n", "print('Landsat on AWS:')\n", "filepath = 'http://landsat-pds.s3.amazonaws.com/c1/L8/227/065/LC08_L1TP_227065_20200608_20200626_01_T1/LC08_L1TP_227065_20200608_20200626_01_T1_B4.TIF'\n", "with rasterio.open(filepath) as src:\n", " print(src.profile)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If one used Rasterio, you would read it like below" - ] - }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Decimation factor= 9\n" - ] - }, - { - "data": { - "text/plain": [ - "Text(0, 0.5, 'Row #')" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "# Use the same example image:\n", "date = '2020-06-08'\n", "url = 'http://landsat-pds.s3.amazonaws.com/c1/L8/227/065/LC08_L1TP_227065_20200608_20200626_01_T1/'\n", "redband = 'LC08_L1TP_227065_20200608_20200626_01_T1_B{}.TIF'.format(4)\n", "nirband = 'LC08_L1TP_227065_20200608_20200626_01_T1_B{}.TIF'.format(5)\n", "mtlfile = 'LC08_L1TP_227065_20200608_20200626_01_T1_{}.json'.format('MTL')\n", - "#nirband = 'LC08_L1TP_227065_20200608_20200608_01_RT_B{}.TIF'.format(5)\n", "\n", "with rasterio.open(url+redband) as src:\n", " profile = src.profile\n", @@ -164,51 +93,14 @@ "plt.ylabel('Row #')" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "mtlfile , needed if we need to scale to top of atmosphere reflectance. But in this tutorial, we are going to skip it" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "But, here, we will use Xarray to read these files" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Get the block size for Xarray" - ] - }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "True\n" - ] - }, - { - "data": { - "text/plain": [ - "[(512, 512)]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ + "# Get the shape size for the red band image\n", + "\n", "red = rasterio.open(url+redband)\n", "print(red.is_tiled)\n", "red.block_shapes" @@ -223,470 +115,15 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "Show/Hide data repr\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "Show/Hide attributes\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
xarray.DataArray
  • band: 1
  • y: 7761
  • x: 7621
  • dask.array<chunksize=(1, 1024, 1024), meta=np.ndarray>
    \n",
    -       "\n",
    -       "\n",
    -       "\n",
    -       "\n",
    -       "
    \n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
    Array Chunk
    Bytes 118.29 MB 2.10 MB
    Shape (1, 7761, 7621) (1, 1024, 1024)
    Count 65 Tasks 64 Chunks
    Type uint16 numpy.ndarray
    \n", - "
    \n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n", - " \n", - " \n", - "\n", - " \n", - " 7621\n", - " 7761\n", - " 1\n", - "\n", - "
    • band
      (band)
      int64
      1
      array([1])
    • y
      (y)
      float64
      -6.837e+05 ... -9.165e+05
      array([-683700., -683730., -683760., ..., -916440., -916470., -916500.])
    • x
      (x)
      float64
      5.733e+05 5.733e+05 ... 8.019e+05
      array([573300., 573330., 573360., ..., 801840., 801870., 801900.])
  • transform :
    (30.0, 0.0, 573285.0, 0.0, -30.0, -683685.0)
    crs :
    +init=epsg:32621
    res :
    (30.0, 30.0)
    is_tiled :
    1
    nodatavals :
    (nan,)
    scales :
    (1.0,)
    offsets :
    (0.0,)
    AREA_OR_POINT :
    Point
" - ], - "text/plain": [ - "\n", - "dask.array, shape=(1, 7761, 7621), dtype=uint16, chunksize=(1, 1024, 1024), chunktype=numpy.ndarray>\n", - "Coordinates:\n", - " * band (band) int64 1\n", - " * y (y) float64 -6.837e+05 -6.837e+05 ... -9.165e+05 -9.165e+05\n", - " * x (x) float64 5.733e+05 5.733e+05 5.734e+05 ... 8.019e+05 8.019e+05\n", - "Attributes:\n", - " transform: (30.0, 0.0, 573285.0, 0.0, -30.0, -683685.0)\n", - " crs: +init=epsg:32621\n", - " res: (30.0, 30.0)\n", - " is_tiled: 1\n", - " nodatavals: (nan,)\n", - " scales: (1.0,)\n", - " offsets: (0.0,)\n", - " AREA_OR_POINT: Point" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "red = xa.open_rasterio(url+redband, chunks={'band': 1, 'x': 1024, 'y': 1024})\n", "nir = xa.open_rasterio(url+nirband, chunks={'band': 1, 'x': 1024, 'y': 1024})\n", "red" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, @@ -729,20 +166,6 @@ "plt.axis('equal')\n", "plt.show()" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can see that using Xarrays, arrays calculations become very simple. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/environment.yml b/environment.yml index 9c4abe4..419a916 100644 --- a/environment.yml +++ b/environment.yml @@ -16,6 +16,7 @@ dependencies: - rioxarray - geopandas - shapely + - dask - pip: - mimesis \ No newline at end of file