From f1b5b7243e5e213e3912a5b4f7ef387bbd2eeb0a Mon Sep 17 00:00:00 2001 From: Benjamin Stewart Date: Fri, 19 Aug 2022 09:51:05 -0400 Subject: [PATCH] updated zonal stats --- .../JUP_SURGP_GLO_B_D__LEI_Evaluation.ipynb | 20 +- .../Mapping_Informality/Untitled.ipynb | 88 ++ ...URB_SCAUR_UKR_B_I_urbanizationReview.ipynb | 1227 ++++++++++++++ .../URB_SEAU1_B_A_Ka_NovelUrbanizaton.ipynb | 1404 ----------------- .../README.md | 0 .../URB_SEAU1_B_A_Ka_NovelUrbanizaton.ipynb | 1369 ++++++++++++++++ .../novelUrbanization.py | 348 ++++ Notebooks/Tutorials/LEI_Example.ipynb | 337 ++-- Notebooks/URB_DECAT_ExtractByISO3.ipynb | 261 +++ src/GOST_Urban/LEI.py | 167 ++ src/{ => GOST_Urban}/UrbanRaster.py | 0 src/{ => GOST_Urban}/WSF/__init__.py | 0 src/{ => GOST_Urban}/WSF/wsfdata.py | 0 src/{ => GOST_Urban}/__init__.py | 0 src/{ => GOST_Urban}/urban_helper.py | 88 +- 15 files changed, 3657 insertions(+), 1652 deletions(-) create mode 100644 Notebooks/Implementations/Mapping_Informality/Untitled.ipynb create mode 100644 Notebooks/Implementations/URB_SCAUR_UKR_B_I_urbanizationReview/URB_SCAUR_UKR_B_I_urbanizationReview.ipynb delete mode 100755 Notebooks/Implementations/URB_SEAU1_B_A_Ka_NovelUrbanizaton.ipynb rename Notebooks/Implementations/{ => URB_SEAU1_NovelUrbanization}/README.md (100%) create mode 100755 Notebooks/Implementations/URB_SEAU1_NovelUrbanization/URB_SEAU1_B_A_Ka_NovelUrbanizaton.ipynb create mode 100755 Notebooks/Implementations/URB_SEAU1_NovelUrbanization/novelUrbanization.py create mode 100644 Notebooks/URB_DECAT_ExtractByISO3.ipynb create mode 100755 src/GOST_Urban/LEI.py rename src/{ => GOST_Urban}/UrbanRaster.py (100%) rename src/{ => GOST_Urban}/WSF/__init__.py (100%) rename src/{ => GOST_Urban}/WSF/wsfdata.py (100%) rename src/{ => GOST_Urban}/__init__.py (100%) rename src/{ => GOST_Urban}/urban_helper.py (87%) diff --git a/Notebooks/Implementations/JUP_SURGP_GLO_B_D__LEI_Evaluation.ipynb b/Notebooks/Implementations/JUP_SURGP_GLO_B_D__LEI_Evaluation.ipynb index 3863da5..2f37ee0 100755 --- a/Notebooks/Implementations/JUP_SURGP_GLO_B_D__LEI_Evaluation.ipynb +++ b/Notebooks/Implementations/JUP_SURGP_GLO_B_D__LEI_Evaluation.ipynb @@ -11,9 +11,21 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'src.LEI'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_81457/3673388608.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;31m#Import GOST urban functions\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"../../\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0msrc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLEI\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'src.LEI'" + ] + } + ], "source": [ "import os, sys, logging\n", "\n", @@ -449,7 +461,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.1" + "version": "3.9.7" } }, "nbformat": 4, diff --git a/Notebooks/Implementations/Mapping_Informality/Untitled.ipynb b/Notebooks/Implementations/Mapping_Informality/Untitled.ipynb new file mode 100644 index 0000000..8d09095 --- /dev/null +++ b/Notebooks/Implementations/Mapping_Informality/Untitled.ipynb @@ -0,0 +1,88 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/wb411133/.local/lib/python3.7/site-packages/geopandas/_compat.py:110: UserWarning: The Shapely GEOS version (3.8.0-CAPI-1.13.1 ) is incompatible with the GEOS version PyGEOS was compiled with (3.9.0-CAPI-1.16.2). Conversions between both will be slow.\n", + " shapely_geos_version, geos_capi_version_string\n" + ] + } + ], + "source": [ + "import sys, os, importlib\n", + "import rasterio, geojson\n", + "\n", + "import pandas as pd\n", + "import geopandas as gpd\n", + "\n", + "from shapely.geometry import box" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "aoi_file = \"/home/public/Data/COUNTRY/BDI/Bujumbura_AOI.geojson\"\n", + "buildings_master = \"/home/public/Data/COUNTRY/BDI/WP_Builidngs_Stats/DA/AFRICA_BURUNDI_building_32735.shp\"\n", + "sel_buildings_file = \"/home/wb411133/data/Country/BDI/WP_Builidngs_Stats/DA/Bujumbura_buildings.shp\"\n", + "if not os.path.exists(os.path.dirname(sel_buildings_file)):\n", + " os.makedirs(os.path.dirname(sel_buildings_file))\n", + "inAOI = gpd.read_file(aoi_file)\n", + "all_buildings = gpd.read_file(buildings_master)\n", + "inAOI = inAOI.to_crs(all_buildings.crs)\n", + "sIndex = all_buildings.sindex" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "if not os.path.exists(sel_buildings_file):\n", + " sel_idx = list(sIndex.intersection(inAOI.total_bounds))\n", + " possible_buildings = all_buildings.loc[sel_idx]\n", + " sel_buildings = possible_buildings.loc[possible_buildings.intersects(inAOI.unary_union)]\n", + " sel_buildings.to_file(sel_buildings_file)\n", + "else:\n", + " sel_buildings = gpd.read_file(sel_buildings_file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (geog)", + "language": "python", + "name": "geog" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Notebooks/Implementations/URB_SCAUR_UKR_B_I_urbanizationReview/URB_SCAUR_UKR_B_I_urbanizationReview.ipynb b/Notebooks/Implementations/URB_SCAUR_UKR_B_I_urbanizationReview/URB_SCAUR_UKR_B_I_urbanizationReview.ipynb new file mode 100644 index 0000000..1e1eb0d --- /dev/null +++ b/Notebooks/Implementations/URB_SCAUR_UKR_B_I_urbanizationReview/URB_SCAUR_UKR_B_I_urbanizationReview.ipynb @@ -0,0 +1,1227 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 195, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], + "source": [ + "import sys, os, json\n", + "import rasterio, geopy\n", + "\n", + "import pandas as pd\n", + "import geopandas as gpd\n", + "\n", + "sys.path.insert(0, \"../../../../gostrocks/src\")\n", + "\n", + "import GOSTRocks.rasterMisc as rMisc\n", + "import GOSTRocks.ntlMisc as ntl\n", + "from GOSTRocks.misc import tPrint\n", + "\n", + "sys.path.append(\"../../../src\")\n", + "\n", + "import GOST_Urban.UrbanRaster as urban\n", + "import GOST_Urban.urban_helper as clippy\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "# read in local important parameters\n", + "local_json = \"/home/wb411133/Code/urbanParameters.json\"\n", + "with open(local_json, 'r') as inJ:\n", + " important_vars = json.load(inJ)" + ] + }, + { + "cell_type": "code", + "execution_count": 204, + "metadata": {}, + "outputs": [], + "source": [ + "iso3 = \"UKR\"\n", + "output_dir = f\"/home/wb411133/data/Projects/{iso3}_Urbanization\"\n", + "if not os.path.exists(output_dir):\n", + " os.makedirs(output_dir)\n", + " \n", + "population_file = f\"/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/MOSAIC_ppp_prj_2020/ppp_prj_2020_{iso3}.tif\"\n", + "admin_bounds = \"/home/public/Data/COUNTRY/UKR/ADMIN/geoBoundaries-UKR-ADM3.geojson\"\n", + "GHSL_file = \"/home/public/Data/GLOBAL/GHSL/ghsl.vrt\"\n", + "\n", + "# Define output files\n", + "urban_extents_file = os.path.join(output_dir, f\"{iso3}_urban_extents.geojson\")\n", + "urban_extents_raster_file = os.path.join(output_dir, f\"{iso3}_urban_extents.tif\")\n", + "urban_extents_hd_file = os.path.join(output_dir, f\"{iso3}_urban_extents_hd.geojson\")\n", + "urban_extents_hd_raster_file = os.path.join(output_dir, f\"{iso3}_urban_extents_hd.tif\")\n", + "admin_urban_summary = os.path.join(output_dir, \"adm3_urban_summary.shp\")\n", + "urban_admin_summary = os.path.join(output_dir, f\"{iso3}_ADM3_urban_summary.csv\")\n", + "\n", + "final_folder = os.path.join(output_dir, \"Mapping_Data\")\n", + "if not os.path.exists(final_folder):\n", + " os.makedirs(final_folder)\n", + " \n", + "admin_final = os.path.join(final_folder, \"admin_summarized.shp\")\n", + "urban_final = os.path.join(final_folder, \"urban_summarized.shp\")\n", + "urban_hd_final = os.path.join(final_folder, \"urban_hd_summarized.shp\")\n", + "focal_cities = os.path.join(final_folder, \"FOCAL_AOIs.shp\")" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "inAdmin = gpd.read_file(admin_bounds)\n", + "inP = rasterio.open(population_file)\n", + "inG = rasterio.open(GHSL_file)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Run urbanization analysis\n", + "1. Create urban extents \n", + "2. Calculate urban population in admin bounds \n", + "3. Summarize nighttime lights in extents and admin\n", + "4. Summarize GHSL in extents and admin" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 1. Create urban extents\n", + "if not os.path.exists(urban_extents_file):\n", + " urban_calculator = urban.urbanGriddedPop(inP)\n", + " urban_extents = urban_calculator.calculateUrban(densVal=3, totalPopThresh=5000, \n", + " smooth=False, queen=False,\n", + " verbose=True, raster=urban_extents_raster_file)\n", + " urban_extents_hd = urban_calculator.calculateUrban(densVal=15, totalPopThresh=50000, \n", + " smooth=True, queen=False,\n", + " verbose=True, raster=, raster=urban_extents_raster_file)\n", + " urban_extents.to_file(urban_extents_file, driver=\"GeoJSON\")\n", + " urban_extents_hd.to_file(urban_extents_hd_file, driver=\"GeoJSON\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 184, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['AlgoliaPlaces',\n", + " 'ArcGIS',\n", + " 'AzureMaps',\n", + " 'BANFrance',\n", + " 'Baidu',\n", + " 'BaiduV3',\n", + " 'Bing',\n", + " 'DataBC',\n", + " 'GeoNames',\n", + " 'GeocodeEarth',\n", + " 'GeocodeFarm',\n", + " 'GeocoderNotFound',\n", + " 'Geolake',\n", + " 'GoogleV3',\n", + " 'Here',\n", + " 'IGNFrance',\n", + " 'LiveAddress',\n", + " 'MapBox',\n", + " 'MapQuest',\n", + " 'MapTiler',\n", + " 'Nominatim',\n", + " 'OpenCage',\n", + " 'OpenMapQuest',\n", + " 'Pelias',\n", + " 'Photon',\n", + " 'PickPoint',\n", + " 'SERVICE_TO_GEOCODER',\n", + " 'TomTom',\n", + " 'What3Words',\n", + " 'Yandex',\n", + " '__all__',\n", + " '__builtins__',\n", + " '__cached__',\n", + " '__doc__',\n", + " '__file__',\n", + " '__loader__',\n", + " '__name__',\n", + " '__package__',\n", + " '__path__',\n", + " '__spec__',\n", + " 'algolia',\n", + " 'arcgis',\n", + " 'azure',\n", + " 'baidu',\n", + " 'banfrance',\n", + " 'base',\n", + " 'bing',\n", + " 'databc',\n", + " 'geocodeearth',\n", + " 'geocodefarm',\n", + " 'geolake',\n", + " 'geonames',\n", + " 'get_geocoder_for_service',\n", + " 'googlev3',\n", + " 'here',\n", + " 'ignfrance',\n", + " 'mapbox',\n", + " 'mapquest',\n", + " 'maptiler',\n", + " 'nominatim',\n", + " 'opencage',\n", + " 'openmapquest',\n", + " 'options',\n", + " 'pelias',\n", + " 'photon',\n", + " 'pickpoint',\n", + " 'smartystreets',\n", + " 'tomtom',\n", + " 'what3words',\n", + " 'yandex']" + ] + }, + "execution_count": 184, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dir(geopy.geocoders)" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [], + "source": [ + "# 2. Calculate urban population in admin areas\n", + "pop_worker = clippy.summarize_population(population_file, gpd.read_file(admin_bounds), urban_extents_raster_file, urban_extents_hd_raster_file)\n", + "summarized_urban = pop_worker.calculate_zonal()\n", + "urban_res = summarized_urban.loc[:,[x for x in summarized_urban.columns if \"SUM\" in x]]\n", + "urban_res.columns = ['TOTAL_POP', \"URBAN_POP\", \"URBAN_HD_POP\"]\n", + "urban_res['shapeID'] = inAdmin['shapeID']\n", + "urban_res['shapeName'] = inAdmin['shapeName']\n", + "urban_res.to_csv(urban_admin_summary)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Completed loop: 0\n" + ] + } + ], + "source": [ + "# 3. Summarize nighttime lights in admin bounds and urban extents\n", + "ntl_files = ntl.find_monthly_ntl()\n", + "\n", + "urbanD = gpd.read_file(urban_extents_file)\n", + "urbanHD = gpd.read_file(urban_extents_hd_file)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "11:55:56\tProcessing 201204-201303\n", + "11:56:14\tProcessing 2013\n", + "11:56:31\tProcessing 2014\n", + "11:56:48\tProcessing 2015\n", + "11:57:04\tProcessing 2016\n", + "11:57:20\tProcessing 2017\n", + "11:57:36\tProcessing 2018\n", + "11:57:53\tProcessing 2019\n", + "11:58:10\tProcessing 2020\n" + ] + } + ], + "source": [ + "viirs_folder = os.path.join(output_dir, \"NTL_ZONAL_RES\")\n", + "if not os.path.exists(viirs_folder):\n", + " os.makedirs(viirs_folder)\n", + "\n", + "for ntl_file in ntl_files:\n", + " inR = rasterio.open(ntl_file)\n", + " name = os.path.basename(ntl_file).split(\"_\")[3]\n", + " tPrint(\"Processing %s\" % name)\n", + " urban_res_file = os.path.join(viirs_folder, f\"URBAN_{name}.csv\")\n", + " urban_hd_res_file = os.path.join(viirs_folder, f\"HD_URBAN_{name}.csv\")\n", + " admin_res_file = os.path.join(viirs_folder, f\"ADMIN_{name}.csv\")\n", + " \n", + " # Urban Summary\n", + " if not os.path.exists(urban_res_file):\n", + " urban_res = rMisc.zonalStats(urbanD, inR, minVal=0.1)\n", + " col_names = [f'URBAN_{name}_{x}' for x in ['SUM','MIN','MAX','MEAN']]\n", + " urban_df = pd.DataFrame(urban_res, columns=col_names)\n", + " urban_df.to_csv(urban_res_file)\n", + " # HD Urban Summary\n", + " if not os.path.exists(urban_hd_res_file):\n", + " hd_urban_res = rMisc.zonalStats(urbanHD, inR, minVal=0.1)\n", + " col_names = [f'HD_URBAN_{name}_{x}' for x in ['SUM','MIN','MAX','MEAN']]\n", + " hd_urban_df = pd.DataFrame(hd_urban_res, columns=col_names)\n", + " hd_urban_df.to_csv(urban_hd_res_file)\n", + " # admin Summary\n", + " if not os.path.exists(admin_res_file):\n", + " admin_res = rMisc.zonalStats(inAdmin, inR, minVal=0.1)\n", + " col_names = [f'ADM_URBAN_{name}_{x}' for x in ['SUM','MIN','MAX','MEAN']]\n", + " admin_df = pd.DataFrame(admin_res, columns=col_names)\n", + " admin_df.to_csv(admin_res_file)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "# Compile VIIRS results\n", + "urb_files = [x for x in os.listdir(viirs_folder) if x.startswith(\"URBAN\")]\n", + "for x in urb_files:\n", + " tempD = pd.read_csv(os.path.join(viirs_folder, x), index_col=0)\n", + " urbanD[x[:-4]] = tempD.iloc[:,0]\n", + "\n", + "hd_urb_files = [x for x in os.listdir(viirs_folder) if x.startswith(\"HD_URBAN\")]\n", + "for x in hd_urb_files:\n", + " tempD = pd.read_csv(os.path.join(viirs_folder, x), index_col=0)\n", + " urbanHD[x[:-4]] = tempD.iloc[:,0]\n", + " \n", + "admin_urb_files = [x for x in os.listdir(viirs_folder) if x.startswith(\"ADMIN\")]\n", + "for x in admin_urb_files:\n", + " tempD = pd.read_csv(os.path.join(viirs_folder, x), index_col=0)\n", + " inAdmin[x[:-4]] = tempD.iloc[:,0]\n", + "\n", + "urbanD.to_csv(urban_viirs_summary)\n", + "urbanHD.to_csv(urban_hd_viirs_summary)\n", + "inAdmin.to_csv(admin_viirs_summary)" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
shapeNameLevelshapeIDshapeGroupshapeTypegeometryurbanPopurbanPop_HDurbanPopHDADMIN_201204-201303ADMIN_2013ADMIN_2014ADMIN_2015ADMIN_2016ADMIN_2017ADMIN_2018ADMIN_2019ADMIN_2020
0TinystivskaADM3UKR-ADM3-10664576B65180807UKRADM3POLYGON ((33.72008 44.71634, 33.72007 44.71683...0.0418010.00.044.87661767.887924116.03918589.41572670.294426151.912277150.744171170.651428216.104889
1UiutnenskaADM3UKR-ADM3-10664576B20681754UKRADM3POLYGON ((33.32283 45.22140, 33.32327 45.22845...0.1745620.00.084.97717397.932007173.487305161.047638151.429688225.306213246.056061280.394592354.656372
2MarfivskaADM3UKR-ADM3-10664576B29349824UKRADM3POLYGON ((36.16669 45.18376, 36.16855 45.22393...0.0000000.00.011.67398739.036156239.736496101.87809852.681526350.950989325.175049369.978394414.872192
3MedvedivskaADM3UKR-ADM3-10664576B32425034UKRADM3MULTIPOLYGON (((34.55497 45.84983, 34.55491 45...0.0000000.00.017.25362845.493160120.73918972.215385128.267792162.026398341.131165285.411011267.982178
4OleksiivskaADM3UKR-ADM3-10664576B19447239UKRADM3POLYGON ((33.75281 45.58381, 33.75240 45.61398...0.0000000.00.01.70366510.310785246.834381112.5723807.670630288.291138276.899536282.907227329.533112
\n", + "
" + ], + "text/plain": [ + " shapeName Level shapeID shapeGroup shapeType \\\n", + "0 Tinystivska ADM3 UKR-ADM3-10664576B65180807 UKR ADM3 \n", + "1 Uiutnenska ADM3 UKR-ADM3-10664576B20681754 UKR ADM3 \n", + "2 Marfivska ADM3 UKR-ADM3-10664576B29349824 UKR ADM3 \n", + "3 Medvedivska ADM3 UKR-ADM3-10664576B32425034 UKR ADM3 \n", + "4 Oleksiivska ADM3 UKR-ADM3-10664576B19447239 UKR ADM3 \n", + "\n", + " geometry urbanPop urbanPop_HD \\\n", + "0 POLYGON ((33.72008 44.71634, 33.72007 44.71683... 0.041801 0.0 \n", + "1 POLYGON ((33.32283 45.22140, 33.32327 45.22845... 0.174562 0.0 \n", + "2 POLYGON ((36.16669 45.18376, 36.16855 45.22393... 0.000000 0.0 \n", + "3 MULTIPOLYGON (((34.55497 45.84983, 34.55491 45... 0.000000 0.0 \n", + "4 POLYGON ((33.75281 45.58381, 33.75240 45.61398... 0.000000 0.0 \n", + "\n", + " urbanPopHD ADMIN_201204-201303 ADMIN_2013 ADMIN_2014 ADMIN_2015 \\\n", + "0 0.0 44.876617 67.887924 116.039185 89.415726 \n", + "1 0.0 84.977173 97.932007 173.487305 161.047638 \n", + "2 0.0 11.673987 39.036156 239.736496 101.878098 \n", + "3 0.0 17.253628 45.493160 120.739189 72.215385 \n", + "4 0.0 1.703665 10.310785 246.834381 112.572380 \n", + "\n", + " ADMIN_2016 ADMIN_2017 ADMIN_2018 ADMIN_2019 ADMIN_2020 \n", + "0 70.294426 151.912277 150.744171 170.651428 216.104889 \n", + "1 151.429688 225.306213 246.056061 280.394592 354.656372 \n", + "2 52.681526 350.950989 325.175049 369.978394 414.872192 \n", + "3 128.267792 162.026398 341.131165 285.411011 267.982178 \n", + "4 7.670630 288.291138 276.899536 282.907227 329.533112 " + ] + }, + "execution_count": 122, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inAdmin.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# 4. Summarize GHSL in extents and admin\n", + "ghsl_cols = [f'c_{x}' for x in [1,2,3,4,5,6]]\n", + "admin_ghsl_summary = os.path.join(output_dir, \"admin_GHSL_summary.csv\")\n", + "urban_ghsl_summary = os.path.join(output_dir, \"urban_GHSL_summary.csv\")\n", + "urbanHD_ghsl_summary = os.path.join(output_dir, \"urbanhd_GHSL_summary.csv\")\n", + "\n", + "if not os.path.exists(admin_ghsl_summary):\n", + " res = rMisc.zonalStats(inAdmin, inG, rastType='C', unqVals = [1,2,3,4,5,6], reProj=True)\n", + " res = pd.DataFrame(res, columns = ghsl_cols)\n", + " res['gID'] = inAdmin['shapeID']\n", + " res.to_csv(admin_ghsl_summary)\n", + " \n", + "if not os.path.exists(urban_ghsl_summary):\n", + " res = rMisc.zonalStats(urbanD, inG, rastType='C', unqVals = [1,2,3,4,5,6], reProj=True)\n", + " res = pd.DataFrame(res, columns = ghsl_cols)\n", + " res['gID'] = urbanD['ID']\n", + " res.to_csv(urban_ghsl_summary)\n", + " \n", + "if not os.path.exists(urbanHD_ghsl_summary):\n", + " res = rMisc.zonalStats(urbanHD, inG, rastType='C', unqVals = [1,2,3,4,5,6], reProj=True)\n", + " res = pd.DataFrame(res, columns = ghsl_cols)\n", + " res['gID'] = urbanHD['ID']\n", + " res.to_csv(urbanHD_ghsl_summary)\n", + " \n", + "for ghsl_file in [admin_ghsl_summary, urban_ghsl_summary, urbanHD_ghsl_summary]:\n", + " adm_ghsl = pd.read_csv(ghsl_file, index_col=0)\n", + " adm_ghsl['b2014'] = adm_ghsl.apply(lambda x: x['c_3'] + x['c_4'] + x['c_5'] + x['c_6'], axis=1)\n", + " adm_ghsl['b2000'] = adm_ghsl.apply(lambda x: x['c_4'] + x['c_5'] + x['c_6'], axis=1)\n", + " adm_ghsl['b1990'] = adm_ghsl.apply(lambda x: x['c_5'] + x['c_6'], axis=1)\n", + " \n", + " def get_built(x):\n", + " cur_built = x['b2014']\n", + " base_built = x['b1990']\n", + " if base_built == 0:\n", + " base_built = x['b2000']\n", + " try:\n", + " return((cur_built - base_built)/base_built)\n", + " except:\n", + " return(-1)\n", + " adm_ghsl['g_14_90'] = adm_ghsl.apply(get_built, axis=1)\n", + " adm_ghsl.to_csv(ghsl_file)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Compile all results for admin divisions and urban extents" + ] + }, + { + "cell_type": "code", + "execution_count": 142, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Compile data\n", + "# [shapefile, population_summary, viirs_summary, ghsl_summary, out_file]\n", + "for cur_def in [\n", + " [admin_bounds, urban_admin_summary, admin_viirs_summary, admin_ghsl_summary, admin_final],\n", + " [urban_extents_file, '', urban_viirs_summary, urban_ghsl_summary, urban_final],\n", + " [urban_extents_hd_file, '', urban_hd_viirs_summary, urbanHD_ghsl_summary, urban_hd_final]\n", + " ]:\n", + " curD = gpd.read_file(cur_def[0])\n", + " if cur_def[1] != '':\n", + " curPop = pd.read_csv(cur_def[1], index_col=0) \n", + " curD['Pop'] = curPop['TOTAL_POP']\n", + " curD['urbanPop'] = curPop.apply(lambda x: x['URBAN_POP']/x['TOTAL_POP'], axis=1)\n", + " curD['urbanPopHD'] = curPop.apply(lambda x: x['URBAN_HD_POP']/x['TOTAL_POP'], axis=1)\n", + " viirsD = pd.read_csv(cur_def[2], index_col=0)\n", + " curD['NTL2013'] = viirsD.iloc[:,-8]\n", + " curD['NTL2020'] = viirsD.iloc[:,-1]\n", + " curD['NTL_g'] = curD.apply(lambda x: (x['NTL2020'] - x['NTL2013'])/x['NTL2013'], axis=1)\n", + " ghslD = pd.read_csv(cur_def[3], index_col=0)\n", + " curD['b2014'] = ghslD['b2014']\n", + " curD['g_14_90'] = ghslD['g_14_90']\n", + " curD.to_file(cur_def[4])\n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
shapeNameLevelshapeIDshapeGroupshapeTypePopurbanPopurbanPopHDNTL2013NTL2020NTL_gb2014g_14_90geometry
0TinystivskaADM3UKR-ADM3-10664576B65180807UKRADM33164.4770510.0418010.067.887924216.1048892.18326034870.044638POLYGON ((33.72008 44.71634, 33.71926 44.71593...
1UiutnenskaADM3UKR-ADM3-10664576B20681754UKRADM34659.8154300.1745620.097.932007354.6563722.621455164720.317549POLYGON ((33.32283 45.22140, 33.32233 45.22139...
2MarfivskaADM3UKR-ADM3-10664576B29349824UKRADM32376.8171390.0000000.039.036156414.8721929.6278966360.177778POLYGON ((36.16669 45.18376, 36.12935 45.18249...
3MedvedivskaADM3UKR-ADM3-10664576B32425034UKRADM33065.2070310.0000000.045.493160267.9821784.89060442060.342483MULTIPOLYGON (((34.55497 45.84983, 34.54492 45...
4OleksiivskaADM3UKR-ADM3-10664576B19447239UKRADM31716.4621580.0000000.010.310785329.53311230.96004012910.778237POLYGON ((33.75281 45.58381, 33.71871 45.58878...
\n", + "
" + ], + "text/plain": [ + " shapeName Level shapeID shapeGroup shapeType \\\n", + "0 Tinystivska ADM3 UKR-ADM3-10664576B65180807 UKR ADM3 \n", + "1 Uiutnenska ADM3 UKR-ADM3-10664576B20681754 UKR ADM3 \n", + "2 Marfivska ADM3 UKR-ADM3-10664576B29349824 UKR ADM3 \n", + "3 Medvedivska ADM3 UKR-ADM3-10664576B32425034 UKR ADM3 \n", + "4 Oleksiivska ADM3 UKR-ADM3-10664576B19447239 UKR ADM3 \n", + "\n", + " Pop urbanPop urbanPopHD NTL2013 NTL2020 NTL_g b2014 \\\n", + "0 3164.477051 0.041801 0.0 67.887924 216.104889 2.183260 3487 \n", + "1 4659.815430 0.174562 0.0 97.932007 354.656372 2.621455 16472 \n", + "2 2376.817139 0.000000 0.0 39.036156 414.872192 9.627896 636 \n", + "3 3065.207031 0.000000 0.0 45.493160 267.982178 4.890604 4206 \n", + "4 1716.462158 0.000000 0.0 10.310785 329.533112 30.960040 1291 \n", + "\n", + " g_14_90 geometry \n", + "0 0.044638 POLYGON ((33.72008 44.71634, 33.71926 44.71593... \n", + "1 0.317549 POLYGON ((33.32283 45.22140, 33.32233 45.22139... \n", + "2 0.177778 POLYGON ((36.16669 45.18376, 36.12935 45.18249... \n", + "3 0.342483 MULTIPOLYGON (((34.55497 45.84983, 34.54492 45... \n", + "4 0.778237 POLYGON ((33.75281 45.58381, 33.71871 45.58878... " + ] + }, + "execution_count": 144, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gpd.read_file(admin_final).head()" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IDPopNTL2013NTL2020NTL_gb2014g_14_90geometry
0925873396.000000467.266479527.8709720.12970039097.00.150352POLYGON ((33.47958 51.88625, 33.48208 51.88625...
192635969.45605540.43315557.8759540.4313987113.00.262962POLYGON ((24.54208 51.68292, 24.54708 51.68292...
2926432302.546875122.652893239.1934360.95016522658.00.202016POLYGON ((33.88208 51.69208, 33.88625 51.69208...
392699637.30859461.957565119.8945770.93510814085.00.333302POLYGON ((24.96375 51.64042, 24.96458 51.64042...
4927825341.3066415.4669737.4184430.356956369.03.855263POLYGON ((30.59542 51.52375, 30.60042 51.52375...
\n", + "
" + ], + "text/plain": [ + " ID Pop NTL2013 NTL2020 NTL_g b2014 g_14_90 \\\n", + "0 9258 73396.000000 467.266479 527.870972 0.129700 39097.0 0.150352 \n", + "1 9263 5969.456055 40.433155 57.875954 0.431398 7113.0 0.262962 \n", + "2 9264 32302.546875 122.652893 239.193436 0.950165 22658.0 0.202016 \n", + "3 9269 9637.308594 61.957565 119.894577 0.935108 14085.0 0.333302 \n", + "4 9278 25341.306641 5.466973 7.418443 0.356956 369.0 3.855263 \n", + "\n", + " geometry \n", + "0 POLYGON ((33.47958 51.88625, 33.48208 51.88625... \n", + "1 POLYGON ((24.54208 51.68292, 24.54708 51.68292... \n", + "2 POLYGON ((33.88208 51.69208, 33.88625 51.69208... \n", + "3 POLYGON ((24.96375 51.64042, 24.96458 51.64042... \n", + "4 POLYGON ((30.59542 51.52375, 30.60042 51.52375... " + ] + }, + "execution_count": 145, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gpd.read_file(urban_final).head()" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IDPopNTL2013NTL2020NTL_gb2014g_14_90geometry
0265752206.855469364.444336419.9535830.15231232926.00.149290POLYGON ((33.45458 51.88125, 33.46125 51.88125...
12658221214.5000001054.1361081941.6391600.841925102841.00.184818POLYGON ((31.25458 51.55042, 31.25875 51.55042...
2265961731.027344291.769470862.1695561.95496852527.00.156320POLYGON ((33.23958 51.25625, 33.24958 51.25625...
32662113739.7031255424.7587894393.543945-0.190094113650.00.065705POLYGON ((34.76625 50.95958, 34.77292 50.95958...
42663148510.1093752359.8652342369.2387700.00397290305.00.238650POLYGON ((25.36708 50.78792, 25.36792 50.78792...
\n", + "
" + ], + "text/plain": [ + " ID Pop NTL2013 NTL2020 NTL_g b2014 \\\n", + "0 2657 52206.855469 364.444336 419.953583 0.152312 32926.0 \n", + "1 2658 221214.500000 1054.136108 1941.639160 0.841925 102841.0 \n", + "2 2659 61731.027344 291.769470 862.169556 1.954968 52527.0 \n", + "3 2662 113739.703125 5424.758789 4393.543945 -0.190094 113650.0 \n", + "4 2663 148510.109375 2359.865234 2369.238770 0.003972 90305.0 \n", + "\n", + " g_14_90 geometry \n", + "0 0.149290 POLYGON ((33.45458 51.88125, 33.46125 51.88125... \n", + "1 0.184818 POLYGON ((31.25458 51.55042, 31.25875 51.55042... \n", + "2 0.156320 POLYGON ((33.23958 51.25625, 33.24958 51.25625... \n", + "3 0.065705 POLYGON ((34.76625 50.95958, 34.77292 50.95958... \n", + "4 0.238650 POLYGON ((25.36708 50.78792, 25.36792 50.78792... " + ] + }, + "execution_count": 146, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gpd.read_file(urban_hd_final).head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Extract sample data for mapping" + ] + }, + { + "cell_type": "code", + "execution_count": 181, + "metadata": {}, + "outputs": [], + "source": [ + "out_ntl_2013 = os.path.join(final_folder, \"VIIRS_2013.tif\")\n", + "out_ntl_2014 = os.path.join(final_folder, \"VIIRS_2014.tif\")\n", + "out_ntl_2020 = os.path.join(final_folder, \"VIIRS_2020.tif\")" + ] + }, + { + "cell_type": "code", + "execution_count": 155, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Completed loop: 0\n" + ] + }, + { + "data": { + "text/plain": [ + "['s3://wbgdecinternal-ntl/NTL/VIIRS/Annual/VIIRS_ANNUAL_EOG/VNL_v2_npp_201204-201303_global_vcmcfg_c202101211500.average.tif',\n", + " 's3://wbgdecinternal-ntl/NTL/VIIRS/Annual/VIIRS_ANNUAL_EOG/VNL_v2_npp_2013_global_vcmcfg_c202101211500.average.tif',\n", + " 's3://wbgdecinternal-ntl/NTL/VIIRS/Annual/VIIRS_ANNUAL_EOG/VNL_v2_npp_2014_global_vcmslcfg_c202101211500.average.tif',\n", + " 's3://wbgdecinternal-ntl/NTL/VIIRS/Annual/VIIRS_ANNUAL_EOG/VNL_v2_npp_2015_global_vcmslcfg_c202101211500.average.tif',\n", + " 's3://wbgdecinternal-ntl/NTL/VIIRS/Annual/VIIRS_ANNUAL_EOG/VNL_v2_npp_2016_global_vcmslcfg_c202101211500.average.tif',\n", + " 's3://wbgdecinternal-ntl/NTL/VIIRS/Annual/VIIRS_ANNUAL_EOG/VNL_v2_npp_2017_global_vcmslcfg_c202101211500.average.tif',\n", + " 's3://wbgdecinternal-ntl/NTL/VIIRS/Annual/VIIRS_ANNUAL_EOG/VNL_v2_npp_2018_global_vcmslcfg_c202101211500.average.tif',\n", + " 's3://wbgdecinternal-ntl/NTL/VIIRS/Annual/VIIRS_ANNUAL_EOG/VNL_v2_npp_2019_global_vcmslcfg_c202101211500.average.tif',\n", + " 's3://wbgdecinternal-ntl/NTL/VIIRS/Annual/VIIRS_ANNUAL_EOG/VNL_v2_npp_2020_global_vcmslcfg_c202101211500.average.tif']" + ] + }, + "execution_count": 155, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# extract nighttime lights for 2013 and 2020\n", + "ntl_files = ntl.find_monthly_ntl()\n", + "ntl_files" + ] + }, + { + "cell_type": "code", + "execution_count": 182, + "metadata": {}, + "outputs": [], + "source": [ + "if not os.path.exists(out_ntl_2013):\n", + " rMisc.clipRaster(rasterio.open(ntl_files[1]), inAdmin, out_ntl_2013)\n", + " \n", + "if not os.path.exists(out_ntl_2014):\n", + " rMisc.clipRaster(rasterio.open(ntl_files[2]), inAdmin, out_ntl_2014)\n", + " \n", + "if not os.path.exists(out_ntl_2020):\n", + " rMisc.clipRaster(rasterio.open(ntl_files[-1]), inAdmin, out_ntl_2020)" + ] + }, + { + "cell_type": "code", + "execution_count": 205, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idNamegeometry
01OdessaPOLYGON ((30.25058 46.14250, 30.25996 46.65254...
12ZaporizziaPOLYGON ((34.94663 47.92600, 35.38985 47.93029...
23SumyPOLYGON ((34.68477 50.98500, 34.96624 50.98270...
34PoltavaPOLYGON ((34.36383 49.64435, 34.70744 49.64554...
45HarkivPOLYGON ((35.97394 50.11743, 36.51403 50.11625...
\n", + "
" + ], + "text/plain": [ + " id Name geometry\n", + "0 1 Odessa POLYGON ((30.25058 46.14250, 30.25996 46.65254...\n", + "1 2 Zaporizzia POLYGON ((34.94663 47.92600, 35.38985 47.93029...\n", + "2 3 Sumy POLYGON ((34.68477 50.98500, 34.96624 50.98270...\n", + "3 4 Poltava POLYGON ((34.36383 49.64435, 34.70744 49.64554...\n", + "4 5 Harkiv POLYGON ((35.97394 50.11743, 36.51403 50.11625..." + ] + }, + "execution_count": 205, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Extract ghsl for the 9 focal cities\n", + "in_cities = gpd.read_file(focal_cities)\n", + "in_cities.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 207, + "metadata": {}, + "outputs": [], + "source": [ + "cnt = 0\n", + "max_cnt = 100\n", + "for idx, row in in_cities.iterrows():\n", + " out_file = os.path.join(final_folder, f\"ghsl_{row['Name']}.tif\")\n", + " if not os.path.exists(out_file):\n", + " rMisc.clipRaster(inG, gpd.GeoDataFrame(pd.DataFrame(row).transpose(), geometry='geometry', crs=in_cities.crs), out_file)\n", + " cnt += 1\n", + " if cnt >= max_cnt:\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Earth Engine", + "language": "python", + "name": "ee" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Notebooks/Implementations/URB_SEAU1_B_A_Ka_NovelUrbanizaton.ipynb b/Notebooks/Implementations/URB_SEAU1_B_A_Ka_NovelUrbanizaton.ipynb deleted file mode 100755 index dc34a6c..0000000 --- a/Notebooks/Implementations/URB_SEAU1_B_A_Ka_NovelUrbanizaton.ipynb +++ /dev/null @@ -1,1404 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Extract data for urban calculations\n", - "\n", - "Test input for Tanzania\n", - "\n", - "0. Select focal ADM, buffer by 1km, rasterize as [0, 1]\n", - "1. Download DEM data from ASTER, mosaick\n", - "2. Calculate slope of DEM\n", - "3. Extract water layer from Globcover\n", - "4. Rasterize building footprints\n", - "5. Select population layer\n", - "6. Standardize all rasters to population layer \n", - " a. Set area outside of focal admin to NoData \n", - " b. Set everything to 16bit \n", - " \n", - " \n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/wb411133/.conda/envs/geog/lib/python3.7/site-packages/geopandas/_compat.py:88: UserWarning: The Shapely GEOS version (3.7.1-CAPI-1.11.1 0) is incompatible with the GEOS version PyGEOS was compiled with (3.9.0-CAPI-1.16.2). Conversions between both will be slow.\n", - " shapely_geos_version, geos_capi_version_string\n" - ] - } - ], - "source": [ - "import sys, os, importlib, shutil\n", - "import requests\n", - "import rasterio, elevation, richdem\n", - "import rasterio.warp\n", - "from rasterio import features\n", - "\n", - "import pandas as pd\n", - "import geopandas as gpd\n", - "import numpy as np\n", - "\n", - "from shapely.geometry import MultiPolygon, Polygon, box, Point\n", - "\n", - "#Import raster helpers\n", - "sys.path.append(\"../../../gostrocks/src\")\n", - "\n", - "import GOSTRocks.rasterMisc as rMisc\n", - "from GOSTRocks.misc import tPrint\n", - "\n", - "#Import GOST urban functions\n", - "sys.path.append(\"../../\")\n", - "import src.UrbanRaster as urban\n", - "import src.urban_helper as helper\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "global_bounds = \"/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp\"\n", - "global_bounds_adm2 = \"/home/public/Data/GLOBAL/ADMIN/Admin2_Polys.shp\"\n", - "\n", - "inG = gpd.read_file(global_bounds)\n", - "inG2 = gpd.read_file(global_bounds_adm2)\n", - "\n", - "runSmall = True\n", - "runLarge = True" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "importlib.reload(helper)\n", - "importlib.reload(rMisc)\n", - "\n", - "def calculate_urban(iso3, inG, inG2, pop_files, ea_file, km=True, small=True):\n", - " global_landcover = \"/home/public/Data/GLOBAL/LANDCOVER/GLOBCOVER/2015/ESACCI-LC-L4-LCCS-Map-300m-P1Y-2015-v2.0.7.tif\"\n", - " global_ghspop = \"/home/public/Data/GLOBAL/Population/GHS/250/GHS_POP_E2015_GLOBE_R2019A_54009_250_V1_0.tif\"\n", - " global_ghspop_1k = \"/home/public/Data/GLOBAL/Population/GHS/GHS_POP_E2015_GLOBE_R2019A_54009_1K_V1_0.tif\"\n", - " global_ghbuilt = \"/home/public/Data/GLOBAL/URBAN/GHS/GHS_1K_BUILT/GHS_BUILT_LDS2014_GLOBE_R2018A_54009_1K_V1_0.tif\"\n", - " global_dem_1k = \"/home/public/Data/GLOBAL/ELEV/noaa_1km.tif\"\n", - " ghs_smod = \"/home/public/Data/GLOBAL/URBAN/GHS/GHS_SMOD/GHS_SMOD_POP2015_GLOBE_R2019A_54009_1K_V2_0.tif\"\n", - " ghsl_vrt = \"/home/public/Data/GLOBAL/GHSL/ghsl.vrt\"\n", - "\n", - " output_folder = \"/home/wb411133/temp/%s_URBAN_DATA_new_naming\" % iso3\n", - " admin2_250_stats = os.path.join(output_folder, \"URBAN_ADMIN2_STATS_COMPILED.csv\")\n", - " commune_250_stats = os.path.join(output_folder, \"URBAN_COMMUNE_STATS_COMPILED.csv\")\n", - " admin2_1k_stats = os.path.join(output_folder, \"URBAN_ADMIN2_STATS_COMPILED_1k.csv\")\n", - " commune_1k_stats = os.path.join(output_folder, \"URBAN_COMMUNE_STATS_COMPILED_1k.csv\")\n", - " \n", - " inD = inG.loc[inG['ISO3'] == iso3]\n", - " inD['geometry'] = inD['geometry'].apply(lambda x: x.buffer(500))\n", - " inD = inD.to_crs({'init':'epsg:4326'})\n", - " \n", - " inD2 = inG2.loc[inG2['ISO3'] == iso3]\n", - " inD2 = inD2.to_crs({'init':'epsg:4326'}) \n", - " \n", - " ### Process 1km data\n", - " if km:\n", - " xx = helper.urban_country(iso3, output_folder, inD, pop_files,\n", - " final_folder=\"FINAL_STANDARD_1KM\", ghspop_suffix=\"1k\")\n", - " adm2_res = os.path.join(xx.final_folder, \"URBAN_ADMIN2_STATS_COMPILED.csv\") \n", - " ea_res = os.path.join(xx.final_folder, \"URBAN_COMMUNE_STATS_COMPILED.csv\")\n", - " tPrint(\"***** Extracting Global Layers %s\" % iso3)\n", - " xx.extract_layers(global_landcover, global_ghspop, global_ghspop_1k, global_ghbuilt, ghsl_vrt, ghs_smod)\n", - " tPrint(\"***** Downloading and processing elevation %s\" % iso3)\n", - " xx.process_dem(global_dem=global_dem_1k)\n", - " tPrint(\"***** Standardizing rasters\")\n", - " xx.standardize_rasters()\n", - " tPrint(\"***** Calculating Urban\")\n", - " xx.calculate_urban()\n", - " tPrint(\"***** Calculating Zonal admin2\")\n", - " if os.path.exists(ea_file):\n", - " if not os.path.exists(admin2_1k_stats):\n", - " zonal_adm2 = xx.pop_zonal_admin(inD2)\n", - " zonal_adm2.to_csv(admin2_1k_stats)\n", - " tPrint(\"***** Calculating Zonal communes\")\n", - " if not os.path.exists(commune_1k_stats):\n", - " inEA = gpd.read_file(ea_file)\n", - " zonal_ea = xx.pop_zonal_admin(inEA)\n", - " zonal_ea.to_csv(commune_1k_stats)\n", - " tPrint(\"***** Evaluating Data\")\n", - " xx.evaluateOutput(admin2_1k_stats, commune_1k_stats) \n", - " \n", - " ### Process 250m data \n", - " if small:\n", - " xx = helper.urban_country(iso3, output_folder, inD, pop_files)\n", - " tPrint(\"***** Extracting Global Layers %s\" % iso3)\n", - " xx.extract_layers(global_landcover, global_ghspop, global_ghspop_1k, global_ghbuilt, ghsl_vrt, ghs_smod)\n", - " tPrint(\"***** Downloading and processing elevation %s\" % iso3)\n", - " xx.process_dem(global_dem=global_dem_1k)\n", - " tPrint(\"***** Standardizing rasters\")\n", - " xx.standardize_rasters()\n", - " tPrint(\"***** Calculating Urban\")\n", - " xx.calculate_urban()\n", - " tPrint(\"***** Calculating Zonal admin2\")\n", - " if os.path.exists(ea_file):\n", - " if not os.path.exists(admin2_250_stats):\n", - " zonal_adm2 = xx.pop_zonal_admin(inD2)\n", - " zonal_adm2.to_csv(admin2_250_stats)\n", - " tPrint(\"***** Calculating Zonal communes\")\n", - " if not os.path.exists(commune_250_stats):\n", - " inEA = gpd.read_file(ea_file)\n", - " zonal_ea = xx.pop_zonal_admin(inEA)\n", - " zonal_ea.to_csv(commune_250_stats)\n", - " tPrint(\"***** Evaluating Data\")\n", - " xx.evaluateOutput(admin2_250_stats, commune_250_stats)\n", - " \n", - " \n", - "# Summarize Pierre's urbanization numbers\n", - "def calc_pp_urban(in_folder, default_pop_file, admin_layer):\n", - " urban_layers = [os.path.join(in_folder, x) for x in os.listdir(in_folder) if x[-4:] == \".tif\"]\n", - " cur_layer = urban_layers[0]\n", - " inD = gpd.read_file(admin_layer)\n", - " default_pop_1k = default_pop_file.replace(default_pop_file[:3], \"%s1k\" % default_pop_file[:3])\n", - " for cur_layer in urban_layers:\n", - " tPrint(cur_layer)\n", - " #Open and read in urban data\n", - " urban_r = rasterio.open(cur_layer)\n", - " urban_data = urban_r.read()\n", - " urban_data = (urban_data > 0).astype(urban_r.meta['dtype'])\n", - " #Extract population data\n", - " urban_layer = os.path.basename(cur_layer) \n", - " default_pop = default_pop_file\n", - " if \"1k\" in urban_layer:\n", - " default_pop = default_pop_1k\n", - " pop_layer = os.path.basename(cur_layer)[:11]\n", - " pop_folder = os.path.join(output_folder, \"FINAL_STANDARD_1KM\")\n", - " else:\n", - " pop_layer = os.path.basename(cur_layer)[:9]\n", - " pop_folder = os.path.join(output_folder, \"FINAL_STANDARD\")\n", - " pop_file = os.path.join(pop_folder,\"%s.tif\" % pop_layer)\n", - " if not os.path.exists(pop_file): \n", - " pop_file = os.path.join(pop_folder, default_pop)\n", - " \n", - " pop_data = rasterio.open(pop_file).read()\n", - " pop_data = pop_data * urban_data\n", - " meta = urban_r.meta.copy()\n", - " meta.update(dtype = pop_data.dtype)\n", - " \n", - " with rMisc.create_rasterio_inmemory(meta, pop_data) as pop_r:\n", - " res = rMisc.zonalStats(inD, pop_r, reProj=True)\n", - " res = pd.DataFrame(res, columns=['SUM', 'MIN', 'MAX', 'MEAN'])\n", - " \n", - " inD[os.path.basename(cur_layer).replace(\".tif\",\"\")] = res['SUM']\n", - " return(inD)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/wb411133/.conda/envs/geog/lib/python3.7/site-packages/geopandas/geodataframe.py:853: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", - " super(GeoDataFrame, self).__setitem__(key, value)\n", - "/home/wb411133/.conda/envs/geog/lib/python3.7/site-packages/pyproj/crs/crs.py:53: FutureWarning: '+init=:' syntax is deprecated. ':' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6\n", - " return _prepare_from_string(\" \".join(pjargs))\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "19:44:01\t***** Extracting Global Layers GHA\n", - "19:44:01\t***** Downloading and processing elevation GHA\n", - "19:44:01\t***** Standardizing rasters\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_adm.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_gpo.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_wat_lc.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_wat.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_wat.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_slo.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_ele.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_gsmod.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_gbu.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_upo15.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_upo17.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_cpo15.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_cpo17.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha1k_gpo.tif\n", - "19:44:01\t***** Calculating Urban\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/gha1k_upo15.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/gha1k_upo17.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/gha1k_cpo15.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/gha1k_cpo17.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/gha1k_gpo.tif\n", - "19:44:01\t***** Calculating Zonal admin2\n", - "19:44:01\t***** Calculating Zonal communes\n", - "19:44:01\t***** Evaluating Data\n", - "19:44:05\t/home/wb411133/temp/GHA_URBAN_DATA_new_naming/URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", - "19:44:05\t/home/wb411133/temp/GHA_URBAN_DATA_new_naming/URBAN_COMMUNE_STATS_COMPILED_1k.csv\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/wb411133/.conda/envs/geog/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3296: DtypeWarning: Columns (19,24,29) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " exec(code_obj, self.user_global_ns, self.user_ns)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "19:44:05\t***** Extracting Global Layers GHA\n", - "19:44:05\t***** Downloading and processing elevation GHA\n", - "19:44:05\t***** Standardizing rasters\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_adm.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_gpo.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_wat_lc.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_wat.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_wat.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_slo.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_ele.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_gsmod.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_gbu.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_upo15.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_upo17.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_cpo15.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_cpo17.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/gha_gpo.tif\n", - "19:44:06\t***** Calculating Urban\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/FINAL_STANDARD/gha_upo15.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/FINAL_STANDARD/gha_upo17.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/FINAL_STANDARD/gha_cpo15.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/FINAL_STANDARD/gha_cpo17.tif\n", - "/home/wb411133/temp/GHA_URBAN_DATA_new_naming/FINAL_STANDARD/gha_gpo.tif\n", - "19:44:06\t***** Calculating Zonal admin2\n", - "19:44:06\t***** Calculating Zonal communes\n", - "19:44:06\t***** Evaluating Data\n", - "19:44:11\t/home/wb411133/temp/GHA_URBAN_DATA_new_naming/URBAN_ADMIN2_STATS_COMPILED.csv\n", - "19:44:11\t/home/wb411133/temp/GHA_URBAN_DATA_new_naming/URBAN_COMMUNE_STATS_COMPILED.csv\n", - "19:44:18\t/home/wb411133/temp/GHA_URBAN_DATA_new_naming/ghana/gha_cpo15d10b2000_cc.tif\n", - "19:45:12\t/home/wb411133/temp/GHA_URBAN_DATA_new_naming/ghana/gha_cpo17d10b2000_cc.tif\n", - "19:46:07\t/home/wb411133/temp/GHA_URBAN_DATA_new_naming/ghana/gha_dbud10b2000_cc.tif\n", - "19:47:02\t/home/wb411133/temp/GHA_URBAN_DATA_new_naming/ghana/gha_gbud10b2000_cc.tif\n", - "19:47:58\t/home/wb411133/temp/GHA_URBAN_DATA_new_naming/ghana/gha_gpod10b2000_cc.tif\n", - "19:48:53\t/home/wb411133/temp/GHA_URBAN_DATA_new_naming/ghana/gha_upo15d10b2000_cc.tif\n", - "19:49:46\t/home/wb411133/temp/GHA_URBAN_DATA_new_naming/ghana/gha_upo17d10b2000_cc.tif\n", - "19:50:41\t/home/wb411133/temp/GHA_URBAN_DATA_new_naming/ghana/gha1k_cpo15d2b2000_cc.tif\n", - "19:51:34\t/home/wb411133/temp/GHA_URBAN_DATA_new_naming/ghana/gha1k_cpo17d2b2000_cc.tif\n", - "19:52:27\t/home/wb411133/temp/GHA_URBAN_DATA_new_naming/ghana/gha1k_dbud2b2000_cc.tif\n", - "19:53:20\t/home/wb411133/temp/GHA_URBAN_DATA_new_naming/ghana/gha1k_gbud2b2000_cc.tif\n", - "19:54:14\t/home/wb411133/temp/GHA_URBAN_DATA_new_naming/ghana/gha1k_gpod2b2000_cc.tif\n", - "19:55:07\t/home/wb411133/temp/GHA_URBAN_DATA_new_naming/ghana/gha1k_upo15d2b2000_cc.tif\n", - "19:56:00\t/home/wb411133/temp/GHA_URBAN_DATA_new_naming/ghana/gha1k_upo17d2b2000_cc.tif\n" - ] - } - ], - "source": [ - "importlib.reload(helper)\n", - "# Process GHA\n", - "iso3 = \"GHA\"\n", - "local_path = \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/\".format(country=iso3)\n", - "pop_2015_un = os.path.join(local_path, \"%s_ppp_2015_UNadj.tif\" % iso3.lower())\n", - "pop_2018_un = os.path.join(local_path, \"%s_ppp_2017_UNadj.tif\" % iso3.lower())\n", - "pop_2015_con = os.path.join(local_path, \"ppp_prj_2015_%s_UNadj.tif\" % iso3)\n", - "pop_2018_con = os.path.join(local_path, \"ppp_prj_2017_%s_UNadj.tif\" % iso3)\n", - "\n", - "pop_files = [[pop_2015_un, \"%s_upo15.tif\" % iso3.lower()], \n", - " [pop_2018_un, \"%s_upo17.tif\" % iso3.lower()], \n", - " [pop_2015_con, \"%s_cpo15.tif\" % iso3.lower()], \n", - " [pop_2018_con, \"%s_cpo17.tif\" % iso3.lower()]]\n", - "output_folder = \"/home/wb411133/temp/%s_URBAN_DATA_new_naming\" % iso3\n", - "ea_file = os.path.join(output_folder, 'FINAL_EAS.shp')\n", - "\n", - "calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)\n", - "pp_urban = calc_pp_urban(os.path.join(output_folder, \"ghana\"), \"%s_cpo17.tif\" % iso3.lower(), ea_file)\n", - "pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(ea_file.replace(\".shp\", \"_PP_Urban.csv\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/wb411133/.conda/envs/geog/lib/python3.7/site-packages/geopandas/geodataframe.py:853: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", - " super(GeoDataFrame, self).__setitem__(key, value)\n", - "/home/wb411133/.conda/envs/geog/lib/python3.7/site-packages/pyproj/crs/crs.py:53: FutureWarning: '+init=:' syntax is deprecated. ':' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6\n", - " return _prepare_from_string(\" \".join(pjargs))\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "19:56:54\t***** Extracting Global Layers ETH\n", - "19:56:54\t***** Downloading and processing elevation ETH\n", - "19:56:54\t***** Standardizing rasters\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_adm.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_gpo.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_wat_lc.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_wat.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_wat.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_slo.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_ele.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_gsmod.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_gbu.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_upo15.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_upo16.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth1k_gpo.tif\n", - "19:56:55\t***** Calculating Urban\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/eth1k_upo15.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/eth1k_upo16.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/eth1k_gpo.tif\n", - "19:56:55\t***** Calculating Zonal admin2\n", - "19:56:55\t***** Calculating Zonal communes\n", - "19:56:55\t***** Evaluating Data\n", - "19:57:07\t/home/wb411133/temp/ETH_URBAN_DATA_new_naming/URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", - "19:57:07\t/home/wb411133/temp/ETH_URBAN_DATA_new_naming/URBAN_COMMUNE_STATS_COMPILED_1k.csv\n", - "19:57:08\t***** Extracting Global Layers ETH\n", - "19:57:08\t***** Downloading and processing elevation ETH\n", - "19:57:08\t***** Standardizing rasters\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_adm.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_gpo.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_wat_lc.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_wat.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_wat.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_slo.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_ele.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_gsmod.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_gbu.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_upo15.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_upo16.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/eth_gpo.tif\n", - "19:57:09\t***** Calculating Urban\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/FINAL_STANDARD/eth_upo15.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/FINAL_STANDARD/eth_upo16.tif\n", - "/home/wb411133/temp/ETH_URBAN_DATA_new_naming/FINAL_STANDARD/eth_gpo.tif\n", - "19:57:09\t***** Calculating Zonal admin2\n", - "19:57:09\t***** Calculating Zonal communes\n", - "19:57:09\t***** Evaluating Data\n", - "19:57:32\t/home/wb411133/temp/ETH_URBAN_DATA_new_naming/URBAN_ADMIN2_STATS_COMPILED.csv\n", - "19:57:32\t/home/wb411133/temp/ETH_URBAN_DATA_new_naming/URBAN_COMMUNE_STATS_COMPILED.csv\n" - ] - } - ], - "source": [ - "# Process ETH\n", - "iso3 = \"ETH\"\n", - "local_path = \"/home/public/Data/COUNTRY/{country}/WORLDPOP/\".format(country=iso3)\n", - "pop_2015_un = os.path.join(local_path, \"%s_ppp_2015_UNadj.tif\" % iso3.lower())\n", - "pop_2018_un = os.path.join(local_path, \"%s_ppp_2016_UNadj.tif\" % iso3.lower())\n", - "pop_files = [[pop_2015_un, \"%s_upo15.tif\" % iso3.lower()], \n", - " [pop_2018_un, \"%s_upo16.tif\" % iso3.lower()]]\n", - "output_folder = \"/home/wb411133/temp/%s_URBAN_DATA_new_naming\" % iso3\n", - "ea_file = os.path.join(output_folder, \"Kebeles\", \"all_kebeles.shp\")\n", - "\n", - "calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/wb411133/.conda/envs/geog/lib/python3.7/site-packages/geopandas/geodataframe.py:853: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", - " super(GeoDataFrame, self).__setitem__(key, value)\n", - "/home/wb411133/.conda/envs/geog/lib/python3.7/site-packages/pyproj/crs/crs.py:53: FutureWarning: '+init=:' syntax is deprecated. ':' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6\n", - " return _prepare_from_string(\" \".join(pjargs))\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "19:57:33\t***** Extracting Global Layers COL\n", - "19:57:33\t***** Downloading and processing elevation COL\n", - "19:57:33\t***** Standardizing rasters\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_adm.tif\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_gpo.tif\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_wat_lc.tif\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_wat.tif\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_wat.tif\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_slo.tif\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_ele.tif\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_gsmod.tif\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_gbu.tif\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_upo15.tif\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/col1k_gpo.tif\n", - "19:57:33\t***** Calculating Urban\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/col1k_upo15.tif\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/col1k_gpo.tif\n", - "19:57:33\t***** Calculating Zonal admin2\n", - "19:57:33\t***** Evaluating Data\n", - "19:57:43\t***** Extracting Global Layers COL\n", - "19:57:43\t***** Downloading and processing elevation COL\n", - "19:57:43\t***** Standardizing rasters\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_adm.tif\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_gpo.tif\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_wat_lc.tif\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_wat.tif\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_wat.tif\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_slo.tif\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_ele.tif\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_gsmod.tif\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_gbu.tif\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_upo15.tif\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/col_gpo.tif\n", - "19:57:46\t***** Calculating Urban\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/FINAL_STANDARD/col_upo15.tif\n", - "/home/wb411133/temp/COL_URBAN_DATA_new_naming/FINAL_STANDARD/col_gpo.tif\n", - "19:57:46\t***** Calculating Zonal admin2\n", - "19:57:46\t***** Evaluating Data\n" - ] - } - ], - "source": [ - "importlib.reload(helper)\n", - "# Process COL\n", - "iso3 = \"COL\"\n", - "local_path = \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/\".format(country=iso3)\n", - "pop_2015_un = os.path.join(local_path, \"%s_ppp_2015_UNadj.tif\" % iso3.lower())\n", - "pop_files = [[pop_2015_un, \"%s_upo15.tif\" % iso3.lower()]]\n", - "output_folder = \"/home/wb411133/temp/%s_URBAN_DATA_new_naming\" % iso3\n", - "ea_file = ''\n", - "\n", - "calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/wb411133/.conda/envs/geog/lib/python3.7/site-packages/geopandas/geodataframe.py:853: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", - " super(GeoDataFrame, self).__setitem__(key, value)\n", - "/home/wb411133/.conda/envs/geog/lib/python3.7/site-packages/pyproj/crs/crs.py:53: FutureWarning: '+init=:' syntax is deprecated. ':' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6\n", - " return _prepare_from_string(\" \".join(pjargs))\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "19:58:10\t***** Extracting Global Layers EGY\n", - "19:58:10\t***** Downloading and processing elevation EGY\n", - "19:58:10\t***** Standardizing rasters\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_adm.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_gpo.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_wat_lc.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_wat.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_wat.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_slo.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_ele.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_gsmod.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_gbu.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_upo15.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_upo16.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy1k_gpo.tif\n", - "19:58:10\t***** Calculating Urban\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/egy1k_upo15.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/egy1k_upo16.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/egy1k_gpo.tif\n", - "19:58:10\t***** Calculating Zonal admin2\n", - "19:58:10\t***** Calculating Zonal communes\n", - "19:58:10\t***** Evaluating Data\n", - "19:58:17\t/home/wb411133/temp/EGY_URBAN_DATA_new_naming/URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", - "19:58:17\t/home/wb411133/temp/EGY_URBAN_DATA_new_naming/URBAN_COMMUNE_STATS_COMPILED_1k.csv\n", - "19:58:17\t***** Extracting Global Layers EGY\n", - "19:58:17\t***** Downloading and processing elevation EGY\n", - "19:58:17\t***** Standardizing rasters\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_adm.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_gpo.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_wat_lc.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_wat.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_wat.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_slo.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_ele.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_gsmod.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_gbu.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_upo15.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_upo16.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/egy_gpo.tif\n", - "19:58:18\t***** Calculating Urban\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/FINAL_STANDARD/egy_upo15.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/FINAL_STANDARD/egy_upo16.tif\n", - "/home/wb411133/temp/EGY_URBAN_DATA_new_naming/FINAL_STANDARD/egy_gpo.tif\n", - "19:58:18\t***** Calculating Zonal admin2\n", - "19:58:18\t***** Calculating Zonal communes\n", - "19:58:18\t***** Evaluating Data\n", - "19:58:32\t/home/wb411133/temp/EGY_URBAN_DATA_new_naming/URBAN_ADMIN2_STATS_COMPILED.csv\n", - "19:58:32\t/home/wb411133/temp/EGY_URBAN_DATA_new_naming/URBAN_COMMUNE_STATS_COMPILED.csv\n" - ] - } - ], - "source": [ - "importlib.reload(helper)\n", - "# Process EGY\n", - "iso3 = \"EGY\"\n", - "local_path = \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/\".format(country=iso3)\n", - "pop_2015_un = os.path.join(local_path, \"%s_ppp_2015_UNadj.tif\" % iso3.lower())\n", - "pop_2018_un = os.path.join(local_path, \"%s_ppp_2013_UNadj.tif\" % iso3.lower())\n", - "pop_files = [[pop_2015_un, \"%s_upo15.tif\" % iso3.lower()], \n", - " [pop_2018_un, \"%s_upo16.tif\" % iso3.lower()]]\n", - "output_folder = \"/home/wb411133/temp/%s_URBAN_DATA_new_naming\" % iso3\n", - "\n", - "ea_file = os.path.join(output_folder, \"EGY_adm3.shp\")\n", - "\n", - "calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/wb411133/.conda/envs/geog/lib/python3.7/site-packages/geopandas/geodataframe.py:853: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", - " super(GeoDataFrame, self).__setitem__(key, value)\n", - "/home/wb411133/.conda/envs/geog/lib/python3.7/site-packages/pyproj/crs/crs.py:53: FutureWarning: '+init=:' syntax is deprecated. ':' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6\n", - " return _prepare_from_string(\" \".join(pjargs))\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "19:58:33\t***** Extracting Global Layers AGO\n", - "19:58:33\t***** Downloading and processing elevation AGO\n", - "19:58:33\t***** Standardizing rasters\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_adm.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_gpo.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_wat_lc.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_wat.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_wat.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_slo.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_ele.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_gsmod.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_gbu.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_upo15.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_upo18.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago1k_gpo.tif\n", - "19:58:33\t***** Calculating Urban\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/ago1k_upo15.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/ago1k_upo18.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/ago1k_gpo.tif\n", - "19:58:33\t***** Calculating Zonal admin2\n", - "19:58:33\t***** Calculating Zonal communes\n", - "19:58:33\t***** Evaluating Data\n", - "19:58:47\t/home/wb411133/temp/AGO_URBAN_DATA_new_naming/URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", - "19:58:47\t/home/wb411133/temp/AGO_URBAN_DATA_new_naming/URBAN_COMMUNE_STATS_COMPILED_1k.csv\n", - "19:58:47\t***** Extracting Global Layers AGO\n", - "19:58:47\t***** Downloading and processing elevation AGO\n", - "19:58:47\t***** Standardizing rasters\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_adm.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_gpo.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_wat_lc.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_wat.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_wat.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_slo.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_ele.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_gsmod.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_gbu.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_upo15.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_upo18.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/ago_gpo.tif\n", - "19:58:49\t***** Calculating Urban\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/FINAL_STANDARD/ago_upo15.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/FINAL_STANDARD/ago_upo18.tif\n", - "/home/wb411133/temp/AGO_URBAN_DATA_new_naming/FINAL_STANDARD/ago_gpo.tif\n", - "19:58:49\t***** Calculating Zonal admin2\n", - "19:58:49\t***** Calculating Zonal communes\n", - "19:58:49\t***** Evaluating Data\n", - "19:59:11\t/home/wb411133/temp/AGO_URBAN_DATA_new_naming/URBAN_ADMIN2_STATS_COMPILED.csv\n", - "19:59:11\t/home/wb411133/temp/AGO_URBAN_DATA_new_naming/URBAN_COMMUNE_STATS_COMPILED.csv\n" - ] - } - ], - "source": [ - "importlib.reload(helper)\n", - "# Process AGO\n", - "iso3 = \"AGO\"\n", - "local_path = \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/\".format(country=iso3)\n", - "pop_2015_un = os.path.join(local_path, \"%s_ppp_2015_UNadj.tif\" % iso3.lower())\n", - "pop_2018_un = os.path.join(local_path, \"%s_ppp_2018_UNadj.tif\" % iso3.lower())\n", - "pop_files = [[pop_2015_un, \"%s_upo15.tif\" % iso3.lower()], \n", - " [pop_2018_un, \"%s_upo18.tif\" % iso3.lower()]]\n", - "output_folder = \"/home/wb411133/temp/%s_URBAN_DATA_new_naming\" % iso3\n", - "ea_file = os.path.join(output_folder, 'admin', 'bairros.shp')\n", - "\n", - "calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/wb411133/.conda/envs/geog/lib/python3.7/site-packages/geopandas/geodataframe.py:853: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", - " super(GeoDataFrame, self).__setitem__(key, value)\n", - "/home/wb411133/.conda/envs/geog/lib/python3.7/site-packages/pyproj/crs/crs.py:53: FutureWarning: '+init=:' syntax is deprecated. ':' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6\n", - " return _prepare_from_string(\" \".join(pjargs))\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "19:59:13\t***** Extracting Global Layers BGD\n", - "19:59:13\t***** Downloading and processing elevation BGD\n", - "19:59:13\t***** Standardizing rasters\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_adm.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_gpo.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_wat_lc.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_wat.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_wat.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_slo.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_ele.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_gsmod.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_gbu.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_upo15.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_upo18.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd1k_gpo.tif\n", - "19:59:13\t***** Calculating Urban\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/bgd1k_upo15.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/bgd1k_upo18.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/bgd1k_gpo.tif\n", - "19:59:13\t***** Calculating Zonal admin2\n", - "19:59:13\t***** Calculating Zonal communes\n", - "19:59:13\t***** Evaluating Data\n", - "19:59:15\t/home/wb411133/temp/BGD_URBAN_DATA_new_naming/URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", - "19:59:15\t/home/wb411133/temp/BGD_URBAN_DATA_new_naming/URBAN_COMMUNE_STATS_COMPILED_1k.csv\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/wb411133/.conda/envs/geog/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3296: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " exec(code_obj, self.user_global_ns, self.user_ns)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "19:59:15\t***** Extracting Global Layers BGD\n", - "19:59:15\t***** Downloading and processing elevation BGD\n", - "19:59:15\t***** Standardizing rasters\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_adm.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_gpo.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_wat_lc.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_wat.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_wat.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_slo.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_ele.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_gsmod.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_gbu.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_upo15.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_upo18.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/bgd_gpo.tif\n", - "19:59:16\t***** Calculating Urban\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/FINAL_STANDARD/bgd_upo15.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/FINAL_STANDARD/bgd_upo18.tif\n", - "/home/wb411133/temp/BGD_URBAN_DATA_new_naming/FINAL_STANDARD/bgd_gpo.tif\n", - "19:59:16\t***** Calculating Zonal admin2\n", - "19:59:16\t***** Calculating Zonal communes\n", - "19:59:16\t***** Evaluating Data\n", - "19:59:19\t/home/wb411133/temp/BGD_URBAN_DATA_new_naming/URBAN_ADMIN2_STATS_COMPILED.csv\n", - "19:59:19\t/home/wb411133/temp/BGD_URBAN_DATA_new_naming/URBAN_COMMUNE_STATS_COMPILED.csv\n" - ] - } - ], - "source": [ - "importlib.reload(helper)\n", - "# Process BGD\n", - "iso3 = \"BGD\"\n", - "local_path = \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/\".format(country=iso3)\n", - "pop_2015_un = os.path.join(local_path, \"%s_ppp_2015_UNadj.tif\" % iso3.lower())\n", - "pop_2018_un = os.path.join(local_path, \"%s_ppp_2018_UNadj.tif\" % iso3.lower())\n", - "\n", - "pop_files = [[pop_2015_un, \"%s_upo15.tif\" % iso3.lower()], \n", - " [pop_2018_un, \"%s_upo18.tif\" % iso3.lower()]]\n", - "\n", - "output_folder = \"/home/wb411133/temp/%s_URBAN_DATA_new_naming\" % iso3\n", - "ea_file = os.path.join(output_folder, 'mauza11_reprojected.shp')\n", - "calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)\n", - "#pp_urban = calc_pp_urban(os.path.join(output_folder, \"bangladesh\"), \"%s_upo18.tif\" % iso3.lower(), ea_file)\n", - "#pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(ea_file.replace(\".shp\", \"_PP_Urban.csv\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/wb411133/.conda/envs/geog/lib/python3.7/site-packages/geopandas/geodataframe.py:853: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", - " super(GeoDataFrame, self).__setitem__(key, value)\n", - "/home/wb411133/.conda/envs/geog/lib/python3.7/site-packages/pyproj/crs/crs.py:53: FutureWarning: '+init=:' syntax is deprecated. ':' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6\n", - " return _prepare_from_string(\" \".join(pjargs))\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "19:59:20\t***** Extracting Global Layers VNM\n", - "19:59:20\t***** Downloading and processing elevation VNM\n", - "19:59:20\t***** Standardizing rasters\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_adm.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_gpo.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_wat_lc.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_wat.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_wat.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_slo.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_ele.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_gsmod.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_gbu.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_upo15.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_upo18.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm1k_gpo.tif\n", - "19:59:20\t***** Calculating Urban\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/vnm1k_upo15.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/vnm1k_upo18.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/vnm1k_gpo.tif\n", - "19:59:20\t***** Calculating Zonal admin2\n", - "19:59:20\t***** Calculating Zonal communes\n", - "19:59:20\t***** Evaluating Data\n", - "19:59:26\t/home/wb411133/temp/VNM_URBAN_DATA_new_naming/URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", - "19:59:26\t/home/wb411133/temp/VNM_URBAN_DATA_new_naming/URBAN_COMMUNE_STATS_COMPILED_1k.csv\n", - "19:59:26\t***** Extracting Global Layers VNM\n", - "19:59:26\t***** Downloading and processing elevation VNM\n", - "19:59:26\t***** Standardizing rasters\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_adm.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_gpo.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_wat_lc.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_wat.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_wat.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_slo.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_ele.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_gsmod.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_gbu.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_upo15.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_upo18.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm_gpo.tif\n", - "19:59:28\t***** Calculating Urban\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/FINAL_STANDARD/vnm_upo15.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/FINAL_STANDARD/vnm_upo18.tif\n", - "/home/wb411133/temp/VNM_URBAN_DATA_new_naming/FINAL_STANDARD/vnm_gpo.tif\n", - "06:28:54\t***** Calculating Zonal admin2\n", - "06:28:54\t***** Calculating Zonal communes\n", - "06:28:54\t***** Evaluating Data\n", - "06:29:09\t/home/wb411133/temp/VNM_URBAN_DATA_new_naming/URBAN_ADMIN2_STATS_COMPILED.csv\n", - "06:29:09\t/home/wb411133/temp/VNM_URBAN_DATA_new_naming/URBAN_COMMUNE_STATS_COMPILED.csv\n" - ] - } - ], - "source": [ - "importlib.reload(helper)\n", - "# Process VNM\n", - "iso3 = \"VNM\"\n", - "local_path = \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/\".format(country=iso3)\n", - "pop_2015_un = os.path.join(local_path, \"%s_ppp_2015_UNadj.tif\" % iso3.lower())\n", - "pop_2018_un = os.path.join(local_path, \"%s_ppp_2018_UNadj.tif\" % iso3.lower())\n", - "pop_files = [[pop_2015_un, \"%s_upo15.tif\" % iso3.lower()], \n", - " [pop_2018_un, \"%s_upo18.tif\" % iso3.lower()]]\n", - "output_folder = \"/home/wb411133/temp/%s_URBAN_DATA_new_naming\" % iso3\n", - "ea_file = os.path.join(output_folder, 'VN_communes2008.shp')\n", - "\n", - "calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)\n", - "#pp_urban = calc_pp_urban(os.path.join(output_folder, \"vietnam\"), \"%s_upo18.tif\" % iso3.lower(), ea_file)\n", - "#pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(ea_file.replace(\".shp\", \"_PP_Urban.csv\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/wb411133/.conda/envs/geog/lib/python3.7/site-packages/geopandas/geodataframe.py:853: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", - " super(GeoDataFrame, self).__setitem__(key, value)\n", - "/home/wb411133/.conda/envs/geog/lib/python3.7/site-packages/pyproj/crs/crs.py:53: FutureWarning: '+init=:' syntax is deprecated. ':' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6\n", - " return _prepare_from_string(\" \".join(pjargs))\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "06:29:09\t***** Extracting Global Layers TZA\n", - "06:29:09\t***** Downloading and processing elevation TZA\n", - "06:29:09\t***** Standardizing rasters\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_adm.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_gpo.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_wat_lc.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_wat.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_wat.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_slo.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_ele.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_gsmod.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_gbu.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_upo15.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_upo18.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_cpo15.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_cpo18.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza1k_gpo.tif\n", - "06:32:41\t***** Calculating Urban\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/tza1k_upo15.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/tza1k_upo18.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/tza1k_cpo15.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/tza1k_cpo18.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/tza1k_gpo.tif\n", - "06:37:20\t***** Calculating Zonal admin2\n", - "06:37:20\t***** Evaluating Data\n", - "06:37:33\t***** Extracting Global Layers TZA\n", - "06:37:33\t***** Downloading and processing elevation TZA\n", - "06:37:33\t***** Standardizing rasters\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_adm.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_gpo.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_wat_lc.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_wat.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_wat.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_slo.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_ele.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_gsmod.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_gbu.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_upo15.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_upo18.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_cpo15.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_cpo18.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/tza_gpo.tif\n", - "06:40:16\t***** Calculating Urban\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD/tza_upo15.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD/tza_upo18.tif\n", - "/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD/tza_cpo15.tif\n" - ] - } - ], - "source": [ - "importlib.reload(helper)\n", - "# Process TZA\n", - "iso3 = \"TZA\"\n", - "local_path = \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/TZA_2015_2018\".format(country=iso3)\n", - "pop_2015_un = os.path.join(local_path, \"%s_ppp_2015_UNadj.tif\" % iso3.lower())\n", - "pop_2018_un = os.path.join(local_path, \"%s_ppp_2018_UNadj.tif\" % iso3.lower())\n", - "pop_2015_con = os.path.join(local_path, \"ppp_prj_2015_%s_UNadj.tif\" % iso3)\n", - "pop_2018_con = os.path.join(local_path, \"ppp_prj_2018_%s_UNadj.tif\" % iso3)\n", - "\n", - "pop_files = [[pop_2015_un, \"%s_upo15.tif\" % iso3.lower()], \n", - " [pop_2018_un, \"%s_upo18.tif\" % iso3.lower()], \n", - " [pop_2015_con, \"%s_cpo15.tif\" % iso3.lower()], \n", - " [pop_2018_con, \"%s_cpo18.tif\" % iso3.lower()]]\n", - "\n", - "output_folder = \"/home/wb411133/temp/%s_URBAN_DATA_new_naming\" % iso3\n", - "ea_file = ''\n", - "\n", - "calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Process point location analysis\n", - "input_file = os.path.join(output_folder, \"sample_imp.csv\")\n", - "inD = pd.read_csv(input_file)\n", - "geoms = [Point(row['gps_imp_lo'], row['gps_imp_la']) for idx, row in inD.iterrows()]\n", - "inD = gpd.GeoDataFrame(inD, geometry=geoms, crs={'init':'epsg:4326'})\n", - "if inD.crs != urban_r.crs:\n", - " inD = inD.to_crs(urban_r.crs)\n", - "geoms = [(row['geometry'].x, row['geometry'].y) for idx, row in inD.iterrows()]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Summarize Pierre's urbanization numbers\n", - "in_folder = os.path.join(output_folder, \"tanzania\")\n", - "urban_layers = [os.path.join(in_folder, x) for x in os.listdir(in_folder)]\n", - "\n", - "cur_layer = urban_layers[0]\n", - "for cur_layer in urban_layers:\n", - " tPrint(cur_layer)\n", - " urban_r = rasterio.open(cur_layer)\n", - " urban_data = urban_r.read()\n", - " urban_data = (urban_data > 0).astype(urban_r.meta['dtype'])\n", - " '''if \"1k\" in cur_layer:\n", - " pop_layer = os.path.basename(cur_layer)[:11]\n", - " pop_folder = os.path.join(output_folder, \"FINAL_STANDARD_1KM\")\n", - " else:\n", - " pop_layer = os.path.basename(cur_layer)[:9]\n", - " pop_folder = os.path.join(output_folder, \"FINAL_STANDARD\")\n", - " pop_file = os.path.join(pop_folder,\"%s.tif\" % pop_layer)\n", - "\n", - " pop_data = rasterio.open(pop_file).read()\n", - " '''\n", - " #pop_data = pop_data * urban_data\n", - " with rMisc.create_rasterio_inmemory(urban_r.profile, urban_data) as pop_r:\n", - " res = pop_r.sample(geoms)\n", - " res = [x[0] for x in list(res)]\n", - " inD[os.path.basename(cur_layer).replace(\".tif\",\"\")] = res" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pd.DataFrame(inD.drop(['geometry'], axis=1)).to_csv(input_file.replace(\".csv\", \"_urban_PP.csv\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pop_tiffs = ['/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD/tza_upo15.tif',\n", - "'/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD/tza_upo18.tif',\n", - "'/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD/tza_cpo15.tif',\n", - "'/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD/tza_cpo18.tif',\n", - "'/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD/tza_gpo.tif']\n", - "\n", - "for pFile in pop_tiffs:\n", - " urb_file = pFile.replace(\".tif\", \"_urban.tif\")\n", - " hd_file = pFile.replace(\".tif\", \"_urban_hd.tif\")\n", - " for curFile in [urb_file, hd_file]:\n", - " inUrb = rasterio.open(curFile)\n", - " if inD.crs!= inUrb.crs:\n", - " inD = inD.to_crs(inUrb.crs)\n", - " geoms = [(row['geometry'].x, row['geometry'].y) for idx, row in inD.iterrows()]\n", - " urb_res = inUrb.sample(geoms)\n", - " inD[os.path.basename(curFile).replace(\".tif\",\"\")] = [x[0] for x in list(urb_res)]\n", - "pd.DataFrame(inD.drop(['geometry'], axis=1)).to_csv(input_file.replace(\".csv\", \"_urban.csv\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Calculate national summaries of urbanization\n", - "pop_tiffs = ['/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD/tza_upo15.tif',\n", - "'/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD/tza_upo18.tif',\n", - "'/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD/tza_cpo15.tif',\n", - "'/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD/tza_cpo18.tif',\n", - "'/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD/tza_gpo.tif']\n", - "\n", - "for pFile in pop_tiffs:\n", - " urb_file = pFile.replace(\".tif\", \"_urban.tif\")\n", - " hd_file = pFile.replace(\".tif\", \"_urban_hd.tif\")\n", - " popR = rasterio.open(pFile)\n", - " popD = popR.read()\n", - " popD[popD < 0] = 0\n", - " \n", - " urbR = rasterio.open(urb_file)\n", - " urbD = urbR.read()\n", - "\n", - " hdR = rasterio.open(hd_file)\n", - " hdD = hdR.read()\n", - " \n", - " print(f\"{os.path.basename(pFile)}: {round(popD.sum())}, {round((popD * urbD).sum())}, \\\n", - " hd pop: {round((popD * hdD).sum())}\")\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Debugging" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# combine Ethiopian Kebeles\n", - "folder = \"/home/wb411133/temp/ETH_URBAN_DATA_new_naming/Kebeles\"\n", - "files = [x for x in os.listdir(folder) if x[-4:] == \".shp\"]\n", - "for f in files:\n", - " xx = gpd.read_file(os.path.join(folder, f))\n", - " try:\n", - " final = final.append(xx)\n", - " except:\n", - " final = xx" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "final.reset_index(inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "final.to_file(os.path.join(folder, \"all_kebeles.shp\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "iso3 = 'tza'\n", - "folder = \"/home/wb411133/temp/%s_URBAN_DATA_new_naming\" % iso3.upper()\n", - "\n", - "pop_250_base = os.path.join(folder, \"%s_gpo.tif\" % iso3)\n", - "pop_1k_base = os.path.join(folder, \"%s1k_gpo.tif\" % iso3)\n", - "\n", - "pop_250_scaled = os.path.join(folder, 'FINAL_STANDARD', \"%s_gpo.tif\" % iso3)\n", - "pop_1k_scaled = os.path.join(folder, 'FINAL_STANDARD_1KM', \"%s1k_gpo.tif\" % iso3)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for rFile in [pop_250_base, pop_250_scaled, pop_1k_base, pop_1k_scaled]:\n", - " inR = rasterio.open(rFile)\n", - " inD = inR.read()\n", - " inD[inD < 0] = 0\n", - " tPrint(f\"{round(inD.sum())}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "final_pop = pop_1k_base\n", - "final_urban = final_pop.replace(\".tif\", \"_urban.tif\")\n", - "final_urban_hd = final_pop.replace(\".tif\", \"_urban_hd.tif\")\n", - "urbanR = urban.urbanGriddedPop(final_pop)\n", - "in_raster = rasterio.open(final_pop)\n", - "total_ratio = (in_raster.res[0] * in_raster.res[1]) / 1000000\n", - "total_ratio " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# calculate urban from 1k population layer with various warping methods\n", - "urban_shp = urbanR.calculateUrban(densVal = 300, totalPopThresh=5000, raster=final_urban)\n", - "cluster_shp = urbanR.calculateUrban(densVal = 1500, totalPopThresh=50000, raster=final_urban_hd, smooth=True, queen=True)\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#Summarize population\n", - "def summarize_pop(final_pop):\n", - " final_pop = pop_1k_base\n", - " final_urban = final_pop.replace(\".tif\", \"_urban.tif\")\n", - " final_urban_hd = final_pop.replace(\".tif\", \"_urban_hd.tif\")\n", - "\n", - " urbanR = rasterio.open(final_urban)\n", - " urbanHDR = rasterio.open(final_urban_hd)\n", - " urbanD = urbanR.read()\n", - " hdD = urbanHDR.read()\n", - " popD = in_raster.read()\n", - " popD[popD == in_raster.meta['nodata']] = 0\n", - "\n", - " totalPop = popD.sum()\n", - " urbanPop = (popD * urbanD).sum()\n", - " hdPop = (popD * hdD).sum()\n", - " print(f'Per urban: {(urbanPop/totalPop*100)}')\n", - " print(f'Per HD urban: {(hdPop/totalPop*100)}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "summarize_pop(final_pop)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "summarize_pop(\"/home/wb411133/temp/TZA_URBAN_DATA_new_naming/FINAL_STANDARD_1K/tza1k_gpo.tif\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "global_ghsl = \"/home/public/Data/GLOBAL/GHSL/ghsl.vrt\"\n", - "pop1k = '/home/wb411133/temp/VNM_URBAN_DATA_new_naming/vnm1k_gpo.tif'\n", - "iso3 = 'VNM'\n", - "in_folder = '/home/wb411133/temp/TESTING_WATER_GHSL'\n", - "inD = gpd.read_file(os.path.join(in_folder, 'aoi.geojson'))\n", - "out_raw = os.path.join(in_folder, \"ghsl_water.tif\")\n", - "out_pop = os.path.join(in_folder, \"ghs_pop_1k.tif\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Why aren't water masks extracting\n", - "inR = rasterio.open(pop1k)\n", - "if inD.crs != inR.crs:\n", - " inD = inD.to_crs(inR.crs)\n", - "ul = inR.index(*inD.total_bounds[0:2])\n", - "lr = inR.index(*inD.total_bounds[2:4])\n", - "# read the subset of the data into a numpy array\n", - "window = ((float(lr[0]), float(ul[0]+1)), (float(ul[1]), float(lr[1]+1)))\n", - "data = inR.read(1, window=window, masked = False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "b = inD.total_bounds\n", - "new_transform = rasterio.transform.from_bounds(b[0], b[1], b[2], b[3], data.shape[1], data.shape[0])\n", - "meta = inR.meta.copy()\n", - "meta.update(driver='GTiff',width=data.shape[1], height=data.shape[0], transform=new_transform)\n", - "data = data.astype(meta['dtype'])\n", - "with rasterio.open(out_pop, 'w', **meta) as outR:\n", - " outR.write_band(1, data)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#test some resampling\n", - "template_r = out_pop\n", - "in_raster = out_raw\n", - "\n", - "tempR = rasterio.open(template_r)\n", - "inR = rasterio.open(in_raster)\n", - "inD = inR.read()[0,:,:]\n", - "\n", - "for rDef in [\n", - " [rasterio.warp.Resampling.nearest, \"nearest\"],\n", - " [rasterio.warp.Resampling.bilinear, \"bil\"],\n", - " [rasterio.warp.Resampling.max, \"max\"],\n", - " [rasterio.warp.Resampling.cubic, \"cubic\"]\n", - " ]:\n", - " out_array = np.zeros(tempR.shape)\n", - " rasterio.warp.reproject(inD, out_array, \n", - " src_transform=inR.meta['transform'], dst_transform=tempR.meta['transform'],\n", - " src_crs = inR.crs, dst_crs = tempR.crs,\n", - " src_nodata = inR.meta['nodata'], dst_nodata = tempR.meta['nodata'],\n", - " resampling = rDef[0])\n", - " print(rDef[0])\n", - " print(out_array.sum())\n", - " out_file = os.path.join(in_folder, \"ghsl_wat_%s.tif\" % rDef[1])\n", - " meta = tempR.meta.copy()\n", - " meta.update(dtype=out_array.dtype)\n", - " with rasterio.open(out_file, 'w', **meta) as outR:\n", - " outR.write_band(1, out_array)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os, shutil" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Delete existing files\n", - "in_folder = \"/home/wb411133/temp\"\n", - "for root, dirs, files in os.walk(in_folder):\n", - " for d in dirs:\n", - " if (d == \"FINAL_STANDARD\") or (d == \"FINAL_STANDARD_1KM\"):\n", - " cur_dir = os.path.join(root, d)\n", - " if os.path.isdir(cur_dir):\n", - " shutil.rmtree(cur_dir)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python (geog)", - "language": "python", - "name": "geog" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.1" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/Notebooks/Implementations/README.md b/Notebooks/Implementations/URB_SEAU1_NovelUrbanization/README.md similarity index 100% rename from Notebooks/Implementations/README.md rename to Notebooks/Implementations/URB_SEAU1_NovelUrbanization/README.md diff --git a/Notebooks/Implementations/URB_SEAU1_NovelUrbanization/URB_SEAU1_B_A_Ka_NovelUrbanizaton.ipynb b/Notebooks/Implementations/URB_SEAU1_NovelUrbanization/URB_SEAU1_B_A_Ka_NovelUrbanizaton.ipynb new file mode 100755 index 0000000..ee2d2af --- /dev/null +++ b/Notebooks/Implementations/URB_SEAU1_NovelUrbanization/URB_SEAU1_B_A_Ka_NovelUrbanizaton.ipynb @@ -0,0 +1,1369 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Extract data for urban calculations\n", + "\n", + "Test input for Tanzania\n", + "\n", + "0. Select focal ADM, buffer by 1km, rasterize as [0, 1]\n", + "1. Download DEM data from ASTER, mosaick\n", + "2. Calculate slope of DEM\n", + "3. Extract water layer from Globcover\n", + "4. Rasterize building footprints\n", + "5. Select population layer\n", + "6. Standardize all rasters to population layer \n", + " a. Set area outside of focal admin to NoData \n", + " b. Set everything to 16bit \n", + " \n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys, os, importlib, shutil, pathlib, datetime\n", + "import requests\n", + "import rasterio, elevation, richdem\n", + "import rasterio.warp\n", + "from rasterio import features\n", + "\n", + "import pandas as pd\n", + "import geopandas as gpd\n", + "import numpy as np\n", + "\n", + "from shapely.geometry import MultiPolygon, Polygon, box, Point\n", + "\n", + "#Import raster helpers\n", + "sys.path.append(\"../../../gostrocks/src\")\n", + "\n", + "import GOSTRocks.rasterMisc as rMisc\n", + "from GOSTRocks.misc import tPrint\n", + "\n", + "#Import GOST urban functions\n", + "sys.path.append(\"../../src\")\n", + "import GOST_Urban.UrbanRaster as urban\n", + "import GOST_Urban.urban_helper as helper\n", + "\n", + "#Import local functions\n", + "import novelUrbanization as nu\n", + "from novelUrbanization import *\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "global_bounds = \"/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp\"\n", + "global_bounds_adm2 = \"/home/public/Data/GLOBAL/ADMIN/Admin2_Polys.shp\"\n", + "\n", + "inG = gpd.read_file(global_bounds)\n", + "inG2 = gpd.read_file(global_bounds_adm2)\n", + "\n", + "runSmall = True\n", + "runLarge = True" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Convert EA csv files to geometry" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "in_folder = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/\"\n", + "ea_files = []\n", + "for root, dirs, files in os.walk(in_folder):\n", + " for x in files:\n", + " if ((x.endswith(\".csv\")) and (not \"URBAN\" in x)):\n", + " ea_files.append(os.path.join(root, x))\n", + " \n", + "ea_files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pd.read_csv(ea_files[-1]).head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "def try_float(x):\n", + " try:\n", + " return(float(x))\n", + " except:\n", + " return(None)\n", + "\n", + "def read_geog(file, lat_column, lon_column, crs='epsg:4326', write_out=True):\n", + " print(os.path.basename(file))\n", + " out_file = file.replace(\".csv\", \".geojson\")\n", + " inD = pd.read_csv(file)\n", + " \n", + " print(inD.shape)\n", + " inD[lat_column] = inD[lat_column].apply(try_float)\n", + " inD[lon_column] = inD[lon_column].apply(try_float) \n", + " inD = inD.loc[~(inD[lat_column].isna() | inD[lon_column].isna())]\n", + " print(inD.shape)\n", + " \n", + " inD_geom = inD.apply(lambda x: Point(float(x[lon_column]), float(x[lat_column])), axis=1)\n", + " inD = gpd.GeoDataFrame(inD, geometry=inD_geom, crs=crs)\n", + " \n", + " if write_out:\n", + " inD.to_file(out_file, driver=\"GeoJSON\") \n", + " return(inD)\n", + "\n", + "#res = read_geog(ea_files[0], \"latdd_corrected\", \"londd_corrected\")\n", + "#res = read_geog(ea_files[1], \"lat\", \"lon\")\n", + "#res = read_geog(ea_files[2], \"latitude\", \"longitude\")\n", + "#res = read_geog(ea_files[3], \"latitude\", \"longitude\")\n", + "#res = read_geog(ea_files[4], \"lat_mean\", \"long_mean\")\n", + "#res = read_geog(ea_files[5], \"latdd_corrected\", \"londd_corrected\")\n", + "#res = read_geog(ea_files[6], \"latdd_corrected\", \"londd_corrected\")\n", + "#res = read_geog(ea_files[7], \"lat_modified\",\"lon_modified\")\n", + "#res = read_geog(ea_files[8], \"lat_corrected\", \"lon_corrected\")\n", + "#res = read_geog(ea_files[9], \"lat_corrected\", \"lon_corrected\")\n", + "res = read_geog(ea_files[-1], \"latDD_corrected\", \"lonDD_corrected\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Run individual counties" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "importlib.reload(helper)\n", + "# Process GHA\n", + "iso3 = \"GHA\"\n", + "local_path = \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/\".format(country=iso3)\n", + "pop_2015_un = os.path.join(local_path, \"%s_ppp_2015_UNadj.tif\" % iso3.lower())\n", + "pop_2018_un = os.path.join(local_path, \"%s_ppp_2017_UNadj.tif\" % iso3.lower())\n", + "pop_2015_con = os.path.join(local_path, \"ppp_prj_2015_%s_UNadj.tif\" % iso3)\n", + "pop_2018_con = os.path.join(local_path, \"ppp_prj_2017_%s_UNadj.tif\" % iso3)\n", + "\n", + "pop_files = [[pop_2015_un, \"%s_upo15.tif\" % iso3.lower()], \n", + " [pop_2018_un, \"%s_upo17.tif\" % iso3.lower()], \n", + " [pop_2015_con, \"%s_cpo15.tif\" % iso3.lower()], \n", + " [pop_2018_con, \"%s_cpo17.tif\" % iso3.lower()]]\n", + "output_folder = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming\" % iso3\n", + "ea_file = os.path.join(output_folder, 'FINAL_EAS.shp')\n", + "\n", + "#nu.calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)\n", + "#nu.check_no_data(output_folder)\n", + "pp_urban = nu.calc_pp_urban(os.path.join(output_folder, \"ghana\"), \"%s_gpo.tif\" % iso3.lower(), ea_file, output_folder)\n", + "pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(os.path.join(output_folder, f\"{iso3}_DB_UrbanPopulation.csv\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "nu.calculate_urban?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "importlib.reload(nu)\n", + "# Process COD\n", + "iso3 = \"COD\"\n", + "local_path = \"/home/public/Data/COUNTRY/{country}/WORLDPOP/\".format(country=iso3)\n", + "pop_2015_un = os.path.join(local_path, \"%s_ppp_2015_UNadj.tif\" % iso3.lower())\n", + "pop_files = [[pop_2015_un, \"%s_upo15.tif\" % iso3.lower()]]\n", + "output_folder = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming\" % iso3\n", + "ea_file = ''\n", + "#ea_file = os.path.join(output_folder, \"admin3\", \"Ethiopia_pti_admin3.shp\")\n", + "#ea_file = os.path.join(output_folder, \"Kebeles\", \"all_kebeles.shp\")\n", + "#ea_file = os.path.join(output_folder, \"gadm36_ETH_2.shp\")\n", + "\n", + "nu.calculate_urban(iso3, inG, inG2, pop_files, ea_file, output_folder, small=runSmall, km=runLarge,include_ghsl_h20=False)\n", + "#pp_urban = nu.calc_pp_urban(os.path.join(output_folder, \"ethiopia\"), \"%s_gpo.tif\" % iso3.lower(), ea_file, output_folder)\n", + "#pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(os.path.join(output_folder, f\"{iso3}_DB_UrbanPopulation_admin3.csv\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Process ETH\n", + "iso3 = \"ETH\"\n", + "local_path = \"/home/public/Data/COUNTRY/{country}/WORLDPOP/\".format(country=iso3)\n", + "pop_2015_un = os.path.join(local_path, \"%s_ppp_2015_UNadj.tif\" % iso3.lower())\n", + "pop_2018_un = os.path.join(local_path, \"%s_ppp_2016_UNadj.tif\" % iso3.lower())\n", + "pop_files = [[pop_2015_un, \"%s_upo15.tif\" % iso3.lower()], \n", + " [pop_2018_un, \"%s_upo16.tif\" % iso3.lower()]]\n", + "output_folder = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming\" % iso3\n", + "ea_file = os.path.join(output_folder, \"admin3\", \"Ethiopia_pti_admin3.shp\")\n", + "#ea_file = os.path.join(output_folder, \"Kebeles\", \"all_kebeles.shp\")\n", + "#ea_file = os.path.join(output_folder, \"gadm36_ETH_2.shp\")\n", + "\n", + "#nu.calculate_urban(iso3, inG, inG2, pop_files, ea_file, output_folder, small=runSmall, km=runLarge)\n", + "pp_urban = nu.calc_pp_urban(os.path.join(output_folder, \"ethiopia\"), \"%s_gpo.tif\" % iso3.lower(), ea_file, output_folder)\n", + "pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(os.path.join(output_folder, f\"{iso3}_DB_UrbanPopulation_admin3.csv\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "input_file = os.path.join(output_folder, \"HBS_GPS.csv\")\n", + "pop_tiffs = [\"eth_gpo.tif\", \"eth_upo15.tif\", 'eth_upo16.tif']\n", + "all_tiffs = []\n", + "base_folder = os.path.join(output_folder, \"FINAL_STANDARD\")\n", + "base_folder_1km = os.path.join(output_folder, \"FINAL_STANDARD_1KM\")\n", + "for pFile in pop_tiffs:\n", + " all_tiffs.append(os.path.join(base_folder, pFile))\n", + " all_tiffs.append(os.path.join(base_folder_1km, pFile.replace(\"eth\", \"eth1k\"))) \n", + "\n", + "# Read in ETH HH locations, clean\n", + "inD = pd.read_csv(input_file)\n", + "inD = inD.loc[~inD['latDD_corrected'].isnull()]\n", + "inD = inD.loc[~inD['lonDD_corrected'].isnull()]\n", + "geoms = [Point(row['lonDD_corrected'], row['latDD_corrected']) for idx, row in inD.iterrows()]\n", + "inD = gpd.GeoDataFrame(inD, geometry=geoms, crs={'init':'epsg:4326'})\n", + "# Calculate point urbanization for degree of urbanization\n", + "out_file = os.path.join(output_folder, f\"{iso3}_DoU_Urban.csv\")\n", + "nu.point_urban_summaries(inD, all_tiffs, out_file)\n", + "# Calculate point urbanization for PP urban\n", + "out_file = os.path.join(output_folder, f\"{iso3}_DB_Urban.csv\")\n", + "in_folder = os.path.join(output_folder, \"ethiopia\")\n", + "pop_tiffs = [os.path.join(in_folder, x) for x in os.listdir(in_folder)]\n", + "nu.pp_point_urban_summaries(inD, pop_tiffs, out_file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "importlib.reload(helper)\n", + "# Process COL\n", + "iso3 = \"COL\"\n", + "local_path = \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/\".format(country=iso3)\n", + "pop_2015_un = os.path.join(local_path, \"%s_ppp_2015_UNadj.tif\" % iso3.lower())\n", + "pop_files = [[pop_2015_un, \"%s_upo15.tif\" % iso3.lower()]]\n", + "output_folder = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming\" % iso3\n", + "#ea_file = os.path.join(output_folder, 'MGN2020_RUR_SECCION', 'MGN_RUR_SECCION.shp')\n", + "ea_file = os.path.join(output_folder, 'MGN2020_URB_SECCION', 'MGN_URB_SECCION.shp')\n", + "#nu.calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)\n", + "pp_urban = nu.calc_pp_urban(os.path.join(output_folder, \"colombia\"), \"%s_gpo.tif\" % iso3.lower(), ea_file, output_folder)\n", + "pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(os.path.join(output_folder, f\"{iso3}_DB_UrbanPopulation.csv\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "importlib.reload(helper)\n", + "# Process EGY\n", + "iso3 = \"EGY\"\n", + "local_path = \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/\".format(country=iso3)\n", + "pop_2015_un = os.path.join(local_path, \"%s_ppp_2015_UNadj.tif\" % iso3.lower())\n", + "pop_2018_un = os.path.join(local_path, \"%s_ppp_2013_UNadj.tif\" % iso3.lower())\n", + "pop_files = [[pop_2015_un, \"%s_upo15.tif\" % iso3.lower()], \n", + " [pop_2018_un, \"%s_upo16.tif\" % iso3.lower()]]\n", + "output_folder = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming\" % iso3\n", + "\n", + "ea_file = os.path.join(output_folder, \"EGY_adm3.shp\")\n", + "\n", + "#nu.calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)\n", + "pp_urban = nu.calc_pp_urban(os.path.join(output_folder, \"egypt\"), \"%s_gpo.tif\" % iso3.lower(), ea_file, output_folder)\n", + "pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(os.path.join(output_folder, f\"{iso3}_DB_UrbanPopulation.csv\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "importlib.reload(helper)\n", + "# Process AGO\n", + "iso3 = \"AGO\"\n", + "local_path = \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/\".format(country=iso3)\n", + "pop_2015_un = os.path.join(local_path, \"%s_ppp_2015_UNadj.tif\" % iso3.lower())\n", + "pop_2018_un = os.path.join(local_path, \"%s_ppp_2018_UNadj.tif\" % iso3.lower())\n", + "pop_files = [[pop_2015_un, \"%s_upo15.tif\" % iso3.lower()], \n", + " [pop_2018_un, \"%s_upo18.tif\" % iso3.lower()]]\n", + "output_folder = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming\" % iso3\n", + "ea_file = os.path.join(output_folder, 'admin', 'bairros.shp')\n", + "\n", + "#nu.calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)\n", + "pp_urban = nu.calc_pp_urban(os.path.join(output_folder, \"angola\"), \"%s_gpo.tif\" % iso3.lower(), ea_file, output_folder)\n", + "pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(os.path.join(output_folder, f\"{iso3}_DB_UrbanPopulation.csv\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "importlib.reload(novelUrbanization)\n", + "# Process BGD\n", + "iso3 = \"BGD\"\n", + "local_path = \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/\".format(country=iso3)\n", + "pop_2015_un = os.path.join(local_path, \"%s_ppp_2015_UNadj.tif\" % iso3.lower())\n", + "pop_2018_un = os.path.join(local_path, \"%s_ppp_2018_UNadj.tif\" % iso3.lower())\n", + "pop_files = [[pop_2015_un, \"%s_upo15.tif\" % iso3.lower()], \n", + " [pop_2018_un, \"%s_upo18.tif\" % iso3.lower()]]\n", + "output_folder = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming\" % iso3\n", + "ea_file = os.path.join(output_folder, 'mauza11_reprojected.shp')\n", + "\n", + "#nu.calculate_urban(iso3, inG, inG2, pop_files, '', small=runSmall, km=runLarge)\n", + "pp_urban = nu.calc_pp_urban(os.path.join(output_folder, \"bangladesh\"), \"%s_gpo.tif\" % iso3.lower(), ea_file, output_folder)\n", + "pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(os.path.join(output_folder, f\"{iso3}_DB_UrbanPopulation.csv\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "importlib.reload(helper)\n", + "# Process VNM\n", + "iso3 = \"VNM\"\n", + "local_path = \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/\".format(country=iso3)\n", + "pop_2015_un = os.path.join(local_path, \"%s_ppp_2015_UNadj.tif\" % iso3.lower())\n", + "pop_2018_un = os.path.join(local_path, \"%s_ppp_2018_UNadj.tif\" % iso3.lower())\n", + "pop_files = [[pop_2015_un, \"%s_upo15.tif\" % iso3.lower()], \n", + " [pop_2018_un, \"%s_upo18.tif\" % iso3.lower()]]\n", + "output_folder = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming\" % iso3\n", + "ea_file = os.path.join(output_folder, 'VN_communes2008.shp')\n", + "\n", + "#nu.calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)\n", + "pp_urban = nu.calc_pp_urban(os.path.join(output_folder, \"vietnam\"), \"%s_gpo.tif\" % iso3.lower(), ea_file, output_folder)\n", + "pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(os.path.join(output_folder, f\"{iso3}_DB_UrbanPopulation.csv\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "importlib.reload(helper)\n", + "# Process TZA\n", + "iso3 = \"TZA\"\n", + "local_path = \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/TZA_2015_2018\".format(country=iso3)\n", + "pop_2015_un = os.path.join(local_path, \"%s_ppp_2015_UNadj.tif\" % iso3.lower())\n", + "pop_2018_un = os.path.join(local_path, \"%s_ppp_2018_UNadj.tif\" % iso3.lower())\n", + "pop_2015_con = os.path.join(local_path, \"ppp_prj_2015_%s_UNadj.tif\" % iso3)\n", + "pop_2018_con = os.path.join(local_path, \"ppp_prj_2018_%s_UNadj.tif\" % iso3)\n", + "\n", + "pop_files = [[pop_2015_un, \"%s_upo15.tif\" % iso3.lower()], \n", + " [pop_2018_un, \"%s_upo18.tif\" % iso3.lower()], \n", + " [pop_2015_con, \"%s_cpo15.tif\" % iso3.lower()], \n", + " [pop_2018_con, \"%s_cpo18.tif\" % iso3.lower()]]\n", + "\n", + "output_folder = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming\" % iso3\n", + "ea_file = os.path.join(output_folder, \"villages\", \"tzvillage.shp\")\n", + "\n", + "nu.calculate_urban(iso3, inG, inG2, pop_files, ea_file, output_folder, small=runSmall, km=runLarge)\n", + "pp_urban = nu.calc_pp_urban(os.path.join(output_folder, \"tanzania\"), \"%s_gpo.tif\" % iso3.lower(), ea_file, output_folder)\n", + "pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(os.path.join(output_folder, f\"{iso3}_DB_UrbanPopulation_village.csv\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Process point location analysis\n", + "input_file = os.path.join(output_folder, \"sample_imp.csv\")\n", + "inD = pd.read_csv(input_file)\n", + "geoms = [Point(row['gps_imp_lo'], row['gps_imp_la']) for idx, row in inD.iterrows()]\n", + "inD = gpd.GeoDataFrame(inD, geometry=geoms, crs={'init':'epsg:4326'})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Summarize Urban definitions\n", + "base_folder = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/TZA_URBAN_DATA_new_naming/FINAL_STANDARD/\"\n", + "base_folder_1km = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/TZA_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/\"\n", + "pop_tiffs = ['tza_upo15.tif','tza_upo18.tif','tza_cpo15.tif','tza_cpo18.tif','tza_gpo.tif']\n", + "final_tiffs = []\n", + "for p in pop_tiffs:\n", + " final_tiffs.append(os.path.join(base_folder, p))\n", + " final_tiffs.append(os.path.join(base_folder_1km, p.replace(\"tza\", \"tza1k\")))\n", + " \n", + "out_file = input_file.replace(\".csv\", \"_urban.csv\")\n", + "nu.point_urban_summaries(inD, final_tiffs, out_file)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Calculate point urbanization for PP urban\n", + "out_file = input_file.replace(\".csv\", \"_urban_PP.csv\")\n", + "in_folder = os.path.join(output_folder, \"tanzania\")\n", + "pop_tiffs = [os.path.join(in_folder, x) for x in os.listdir(in_folder)]\n", + "nu.pp_point_urban_summaries(inD, pop_tiffs, out_file)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Compile and copy mapping data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "countries = {'AGO':'angola','BGD':'bangladesh','EGY':'egypt','ETH':'ethiopia',\n", + " 'GHA':'ghana','TZA':'tanzania','VNM':'vietnam'}\n", + "for iso3 in countries.keys():\n", + " out_folder = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Mapping/URBAN_Data\"\n", + " data_folder = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming/\" % iso3\n", + " dou_folder = os.path.join(data_folder, \"FINAL_STANDARD\")\n", + " db_folder = os.path.join(data_folder, countries[iso3])\n", + " \n", + " dou_urban = os.path.join(dou_folder, f'{iso3.lower()}_upo15_urban.tif')\n", + " dou_urban_hd = os.path.join(dou_folder, f'{iso3.lower()}_upo15_urban_hd.tif')\n", + " \n", + " db_urban_cc = os.path.join(db_folder, f\"{iso3.lower()}_upo15d20b2000_cc.tif\")\n", + " db_urban_co = os.path.join(db_folder, f\"{iso3.lower()}_upo15d20b2000_co.tif\")\n", + " db_urban_ur = os.path.join(db_folder, f\"{iso3.lower()}_upo15d20b2000_ur.tif\")\n", + " \n", + " for uFile in [dou_urban, dou_urban_hd, db_urban_cc, db_urban_co, db_urban_ur]:\n", + " print(f'{iso3}: {os.path.exists(uFile)}')\n", + " out_file = os.path.join(out_folder, os.path.basename(uFile))\n", + " shutil.copy(uFile, out_file)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "shutil.copy?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Compile zonal results" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "AGO: AGO_HH_GPS_WB_URBAN.csv - 2022-06-07 11:41:21.951099\n", + "AGO: AGO_HH_GPS_PP_URBAN.csv - 2022-06-07 11:41:32.765155\n", + "AGO: AGO_EA_PP_URBAN.csv - 2022-06-07 14:18:35.263645\n", + "AGO: AGO_EA_WB_URBAN.csv - 2022-06-07 21:06:52.371177\n", + "ETH: ETH_HH_GPS_WB_URBAN.csv - 2022-06-06 14:21:00.484850\n", + "ETH: ETH_HH_GPS_PP_URBAN.csv - 2022-06-06 14:24:16.564839\n", + "ETH: ETH_EA_WB_URBAN.csv - 2022-06-07 21:12:17.944850\n", + "ETH: ETH_EA_PP_URBAN.csv - 2022-06-07 21:14:26.158509\n", + "BFA: BFA_HH_GPS_WB_URBAN.csv - 2022-06-06 14:20:44.531770\n", + "BFA: BFA_HH_GPS_PP_URBAN.csv - 2022-06-06 14:20:51.765806\n", + "BFA: BFA_EA_WB_URBAN.csv - 2022-06-07 21:12:18.841855\n", + "BFA: BFA_EA_PP_URBAN.csv - 2022-06-07 21:15:21.464793\n", + "CIV: CIV_HH_GPS_WB_URBAN.csv - 2022-06-06 14:17:55.492918\n", + "CIV: CIV_HH_GPS_PP_URBAN.csv - 2022-06-06 14:18:07.917980\n", + "CIV: CIV_EA_WB_URBAN.csv - 2022-06-07 21:11:58.558751\n", + "CIV: CIV_EA_PP_URBAN.csv - 2022-06-07 21:12:30.087913\n", + "GAB: GAB_HH_GPS_WB_URBAN.csv - 2022-06-06 14:19:16.228325\n", + "GAB: GAB_HH_GPS_PP_URBAN.csv - 2022-06-06 14:20:39.876746\n", + "GAB: GAB_EA_WB_URBAN.csv - 2022-06-07 21:12:05.425786\n", + "GAB: GAB_EA_PP_URBAN.csv - 2022-06-07 21:13:28.423212\n", + "GIN: GIN_HH_GPS_WB_URBAN.csv - 2022-06-06 14:18:20.003041\n", + "GIN: GIN_HH_GPS_PP_URBAN.csv - 2022-06-06 14:18:28.878086\n", + "GIN: GIN_EA_WB_URBAN.csv - 2022-06-07 21:12:02.712772\n", + "GIN: GIN_EA_PP_URBAN.csv - 2022-06-07 21:12:54.512038\n", + "GNB: GNB_HH_GPS_WB_URBAN.csv - 2022-06-06 14:17:29.182785\n", + "GNB: GNB_HH_GPS_PP_URBAN.csv - 2022-06-06 14:17:35.614818\n", + "GNB: GNB_EA_WB_URBAN.csv - 2022-06-07 21:11:55.043733\n", + "GNB: GNB_EA_PP_URBAN.csv - 2022-06-07 21:12:08.672803\n", + "LSO: LSO_HH_GPS_WB_URBAN.csv - 2022-06-06 14:18:00.892945\n", + "LSO: LSO_HH_GPS_PP_URBAN.csv - 2022-06-06 14:18:47.367179\n", + "LSO: LSO_EA_WB_URBAN.csv - 2022-06-07 21:11:57.680746\n", + "LSO: LSO_EA_PP_URBAN.csv - 2022-06-07 21:12:25.628890\n", + "MLI: MLI_HH_GPS_WB_URBAN.csv - 2022-06-06 14:20:35.669725\n", + "MLI: MLI_HH_GPS_PP_URBAN.csv - 2022-06-06 14:21:43.331066\n", + "MLI: MLI_EA_WB_URBAN.csv - 2022-06-07 21:12:14.770834\n", + "MLI: MLI_EA_PP_URBAN.csv - 2022-06-07 21:14:47.788620\n", + "MWI: MWI_HH_GPS_WB_URBAN.csv - 2022-06-06 14:19:24.706367\n", + "MWI: MWI_HH_GPS_PP_URBAN.csv - 2022-06-06 14:21:29.197995\n", + "MWI: MWI_EA_WB_URBAN.csv - 2022-06-07 21:12:07.235795\n", + "MWI: MWI_EA_PP_URBAN.csv - 2022-06-07 21:13:23.784189\n", + "NER: NER_HH_GPS_WB_URBAN.csv - 2022-06-06 14:18:59.272239\n", + "NER: NER_HH_GPS_PP_URBAN.csv - 2022-06-06 14:19:06.237274\n", + "NER: NER_EA_WB_URBAN.csv - 2022-06-07 21:12:04.380781\n", + "NER: NER_EA_PP_URBAN.csv - 2022-06-07 21:13:36.988256\n", + "SEN: SEN_HH_GPS_WB_URBAN.csv - 2022-06-06 14:17:56.650924\n", + "SEN: SEN_HH_GPS_PP_URBAN.csv - 2022-06-06 14:18:04.464963\n", + "SEN: SEN_EA_WB_URBAN.csv - 2022-06-07 21:11:59.892758\n", + "SEN: SEN_EA_PP_URBAN.csv - 2022-06-07 21:12:31.368919\n", + "TCD: TCD_HH_GPS_WB_URBAN.csv - 2022-06-06 14:17:56.695924\n", + "TCD: TCD_HH_GPS_PP_URBAN.csv - 2022-06-06 14:18:05.604969\n", + "TCD: TCD_EA_WB_URBAN.csv - 2022-06-07 21:11:53.901727\n", + "TCD: TCD_EA_PP_URBAN.csv - 2022-06-07 21:12:31.185918\n", + "UGA: UGA_HH_GPS_WB_URBAN.csv - 2022-06-06 14:34:16.771864\n", + "UGA: UGA_HH_GPS_PP_URBAN.csv - 2022-06-06 14:34:20.935885\n", + "UGA: UGA_EA_WB_URBAN.csv - 2022-06-07 21:14:56.845667\n", + "UGA: UGA_EA_PP_URBAN.csv - 2022-06-07 21:27:56.608654\n" + ] + } + ], + "source": [ + "in_folder = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/\"\n", + "out_folder = os.path.join(in_folder, \"URBAN_ZONAL_RESULTS\")\n", + "if not os.path.exists(out_folder):\n", + " os.makedirs(out_folder)\n", + " \n", + "for root, dirs, files in os.walk(in_folder):\n", + " if \"URBAN_DATA_new_naming\" in root: \n", + " country = os.path.basename(root).split(\"_\")[0] \n", + " if country in nu.EA_DEFS.keys():\n", + " for f in files:\n", + " if (\"EA_PP_URBAN\" in f) | (\"EA_WB_URBAN\" in f) | (\"HH_GPS\" in f):\n", + " fName = pathlib.Path(os.path.join(root, f)) \n", + " date = datetime.datetime.fromtimestamp(fName.stat().st_mtime)\n", + " if datetime.datetime(2021,6,1) < date:\n", + " print(f'{country}: {f} - {date}') \n", + " else:\n", + " print(f'***OLD: {country}: {f} - {date}') \n", + " shutil.copy(os.path.join(root, f), os.path.join(out_folder, f))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "AGO: URBAN_ADMIN2_STATS_COMPILED.csv\n", + "AGO: URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", + "AGO: AGO_URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", + "AGO: AGO_URBAN_ADMIN2_STATS_COMPILED.csv\n", + "ETH: URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", + "ETH: ETH_URBAN_ADMIN2_STATS_COMPILED.csv\n", + "ETH: URBAN_ADMIN2_STATS_COMPILED.csv\n", + "BFA: BFA_URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", + "BFA: BFA_URBAN_ADMIN2_STATS_COMPILED.csv\n", + "CIV: URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", + "CIV: URBAN_ADMIN2_STATS_COMPILED.csv\n", + "CIV: CIV_URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", + "CIV: CIV_URBAN_ADMIN2_STATS_COMPILED.csv\n", + "GAB: URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", + "GAB: URBAN_ADMIN2_STATS_COMPILED.csv\n", + "GAB: GAB_URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", + "GAB: GAB_URBAN_ADMIN2_STATS_COMPILED.csv\n", + "GIN: GIN_URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", + "GIN: GIN_URBAN_ADMIN2_STATS_COMPILED.csv\n", + "GNB: URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", + "GNB: URBAN_ADMIN2_STATS_COMPILED.csv\n", + "GNB: GNB_URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", + "GNB: GNB_URBAN_ADMIN2_STATS_COMPILED.csv\n", + "LSO: URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", + "LSO: URBAN_ADMIN2_STATS_COMPILED.csv\n", + "LSO: LSO_URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", + "LSO: LSO_URBAN_ADMIN2_STATS_COMPILED.csv\n", + "MLI: URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", + "MLI: URBAN_ADMIN2_STATS_COMPILED.csv\n", + "MLI: MLI_URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", + "MLI: MLI_URBAN_ADMIN2_STATS_COMPILED.csv\n", + "MWI: URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", + "MWI: URBAN_ADMIN2_STATS_COMPILED.csv\n", + "MWI: MWI_URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", + "MWI: MWI_URBAN_ADMIN2_STATS_COMPILED.csv\n", + "NER: URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", + "NER: URBAN_ADMIN2_STATS_COMPILED.csv\n", + "NER: NER_URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", + "NER: NER_URBAN_ADMIN2_STATS_COMPILED.csv\n", + "SEN: URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", + "SEN: URBAN_ADMIN2_STATS_COMPILED.csv\n", + "SEN: SEN_URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", + "SEN: SEN_URBAN_ADMIN2_STATS_COMPILED.csv\n", + "TCD: TCD_URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", + "TCD: TCD_URBAN_ADMIN2_STATS_COMPILED.csv\n", + "UGA: URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", + "UGA: URBAN_ADMIN2_STATS_COMPILED.csv\n", + "UGA: UGA_URBAN_ADMIN2_STATS_COMPILED_1k.csv\n", + "UGA: UGA_URBAN_ADMIN2_STATS_COMPILED.csv\n" + ] + } + ], + "source": [ + "# Delete all zonal stats\n", + "for root, dirs, files in os.walk(in_folder):\n", + " if \"URBAN_DATA_new_naming\" in root: \n", + " country = os.path.basename(root).split(\"_\")[0] \n", + " if country in nu.EA_DEFS.keys():\n", + " for f in files:\n", + " if (\"URBAN_COMMUNE_STATS\" in f) | (\"URBAN_ADMIN2\" in f):\n", + " print(f'{country}: {f}')\n", + " os.remove(os.path.join(root, f))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Generating zip commands" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Delete existing files\n", + "in_folder = \"/home/wb411133/temp\"\n", + "for root, dirs, files in os.walk(in_folder):\n", + " for d in dirs:\n", + " if (d == \"FINAL_STANDARD\") or (d == \"FINAL_STANDARD_1KM\"):\n", + " cur_dir = os.path.join(root, d)\n", + " print(\"zip -r {out_file} {infolder}\".format(\n", + " out_file = \"%s_%s.zip\" % (cur_dir.split(\"/\")[-2].split(\"_\")[0], cur_dir.split(\"_\")[-1]),\n", + " infolder = os.path.join(os.path.basename(os.path.dirname(cur_dir)), os.path.basename(cur_dir))))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Debugging" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "file1 = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/LSO_lso_cpo20.tif.csv\"\n", + "file2 = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/LSO_lso_gpo.tif.csv\"\n", + "\n", + "inD1 = pd.read_csv(file1, index_col=0)\n", + "inD2 = pd.read_csv(file2, index_col=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TOTALPOP_lso1k_upo15.tif_SUMTOTALPOP_lso1k_upo15.tif_MINTOTALPOP_lso1k_upo15.tif_MAXTOTALPOP_lso1k_upo15.tif_MEAN_lso1k_upo15_urban.tif_SUM_lso1k_upo15_urban.tif_MIN_lso1k_upo15_urban.tif_MAX_lso1k_upo15_urban.tif_MEAN_lso1k_upo15_urban_hd.tif_SUM_lso1k_upo15_urban_hd.tif_MIN...TOTALPOP_lso1k_cpo20.tif_MAXTOTALPOP_lso1k_cpo20.tif_MEAN_lso1k_cpo20_urban.tif_SUM_lso1k_cpo20_urban.tif_MIN_lso1k_cpo20_urban.tif_MAX_lso1k_cpo20_urban.tif_MEAN_lso1k_cpo20_urban_hd.tif_SUM_lso1k_cpo20_urban_hd.tif_MIN_lso1k_cpo20_urban_hd.tif_MAX_lso1k_cpo20_urban_hd.tif_MEAN
013202.0133616.617668223.39756442.3141450.0-0.0-0.00.00.0-0.0...510.09518832.1184040.0-0.0-0.00.00.0-0.0-0.00.0
18109.83814615.672872110.43479147.9872080.00.00.00.00.00.0...499.48213936.6019160.00.00.00.00.00.00.00.0
224401.3141214.62893281.78115220.0503810.0-0.0-0.00.00.0-0.0...377.77764818.5902650.0-0.0-0.00.00.0-0.0-0.00.0
322974.2534806.252507242.56207474.3503350.00.00.00.00.00.0...379.54394068.9484190.00.00.00.00.00.00.00.0
419419.33252713.074320111.64587043.9351410.00.00.00.00.00.0...316.08367542.4692490.00.00.00.00.00.00.00.0
\n", + "

5 rows × 48 columns

\n", + "
" + ], + "text/plain": [ + " TOTALPOP_lso1k_upo15.tif_SUM TOTALPOP_lso1k_upo15.tif_MIN \\\n", + "0 13202.013361 6.617668 \n", + "1 8109.838146 15.672872 \n", + "2 24401.314121 4.628932 \n", + "3 22974.253480 6.252507 \n", + "4 19419.332527 13.074320 \n", + "\n", + " TOTALPOP_lso1k_upo15.tif_MAX TOTALPOP_lso1k_upo15.tif_MEAN \\\n", + "0 223.397564 42.314145 \n", + "1 110.434791 47.987208 \n", + "2 81.781152 20.050381 \n", + "3 242.562074 74.350335 \n", + "4 111.645870 43.935141 \n", + "\n", + " _lso1k_upo15_urban.tif_SUM _lso1k_upo15_urban.tif_MIN \\\n", + "0 0.0 -0.0 \n", + "1 0.0 0.0 \n", + "2 0.0 -0.0 \n", + "3 0.0 0.0 \n", + "4 0.0 0.0 \n", + "\n", + " _lso1k_upo15_urban.tif_MAX _lso1k_upo15_urban.tif_MEAN \\\n", + "0 -0.0 0.0 \n", + "1 0.0 0.0 \n", + "2 -0.0 0.0 \n", + "3 0.0 0.0 \n", + "4 0.0 0.0 \n", + "\n", + " _lso1k_upo15_urban_hd.tif_SUM _lso1k_upo15_urban_hd.tif_MIN ... \\\n", + "0 0.0 -0.0 ... \n", + "1 0.0 0.0 ... \n", + "2 0.0 -0.0 ... \n", + "3 0.0 0.0 ... \n", + "4 0.0 0.0 ... \n", + "\n", + " TOTALPOP_lso1k_cpo20.tif_MAX TOTALPOP_lso1k_cpo20.tif_MEAN \\\n", + "0 510.095188 32.118404 \n", + "1 499.482139 36.601916 \n", + "2 377.777648 18.590265 \n", + "3 379.543940 68.948419 \n", + "4 316.083675 42.469249 \n", + "\n", + " _lso1k_cpo20_urban.tif_SUM _lso1k_cpo20_urban.tif_MIN \\\n", + "0 0.0 -0.0 \n", + "1 0.0 0.0 \n", + "2 0.0 -0.0 \n", + "3 0.0 0.0 \n", + "4 0.0 0.0 \n", + "\n", + " _lso1k_cpo20_urban.tif_MAX _lso1k_cpo20_urban.tif_MEAN \\\n", + "0 -0.0 0.0 \n", + "1 0.0 0.0 \n", + "2 -0.0 0.0 \n", + "3 0.0 0.0 \n", + "4 0.0 0.0 \n", + "\n", + " _lso1k_cpo20_urban_hd.tif_SUM _lso1k_cpo20_urban_hd.tif_MIN \\\n", + "0 0.0 -0.0 \n", + "1 0.0 0.0 \n", + "2 0.0 -0.0 \n", + "3 0.0 0.0 \n", + "4 0.0 0.0 \n", + "\n", + " _lso1k_cpo20_urban_hd.tif_MAX _lso1k_cpo20_urban_hd.tif_MEAN \n", + "0 -0.0 0.0 \n", + "1 0.0 0.0 \n", + "2 -0.0 0.0 \n", + "3 0.0 0.0 \n", + "4 0.0 0.0 \n", + "\n", + "[5 rows x 48 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inD1.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TOTALPOP_lso1k_gpo.tif_SUMTOTALPOP_lso1k_gpo.tif_MINTOTALPOP_lso1k_gpo.tif_MAXTOTALPOP_lso1k_gpo.tif_MEAN_lso1k_gpo_urban.tif_SUM_lso1k_gpo_urban.tif_MIN_lso1k_gpo_urban.tif_MAX_lso1k_gpo_urban.tif_MEAN_lso1k_gpo_urban_hd.tif_SUM_lso1k_gpo_urban_hd.tif_MIN_lso1k_gpo_urban_hd.tif_MAX_lso1k_gpo_urban_hd.tif_MEAN
01389.2176250.0676.3883064.4526210.000000-0.0-0.0000000.0000000.0-0.0-0.00.0
1742.8562620.0742.8562624.3955990.0000000.00.0000000.0000000.00.00.00.0
227027.9417920.07224.13720722.20866214351.857422-0.07224.13720711.6303540.0-0.0-0.00.0
324098.4345950.011767.49804777.98846120004.9467770.011767.49804760.9906910.00.00.00.0
425709.0781250.023150.86523458.16533523150.8652340.023150.86523452.3775230.00.00.00.0
\n", + "
" + ], + "text/plain": [ + " TOTALPOP_lso1k_gpo.tif_SUM TOTALPOP_lso1k_gpo.tif_MIN \\\n", + "0 1389.217625 0.0 \n", + "1 742.856262 0.0 \n", + "2 27027.941792 0.0 \n", + "3 24098.434595 0.0 \n", + "4 25709.078125 0.0 \n", + "\n", + " TOTALPOP_lso1k_gpo.tif_MAX TOTALPOP_lso1k_gpo.tif_MEAN \\\n", + "0 676.388306 4.452621 \n", + "1 742.856262 4.395599 \n", + "2 7224.137207 22.208662 \n", + "3 11767.498047 77.988461 \n", + "4 23150.865234 58.165335 \n", + "\n", + " _lso1k_gpo_urban.tif_SUM _lso1k_gpo_urban.tif_MIN \\\n", + "0 0.000000 -0.0 \n", + "1 0.000000 0.0 \n", + "2 14351.857422 -0.0 \n", + "3 20004.946777 0.0 \n", + "4 23150.865234 0.0 \n", + "\n", + " _lso1k_gpo_urban.tif_MAX _lso1k_gpo_urban.tif_MEAN \\\n", + "0 -0.000000 0.000000 \n", + "1 0.000000 0.000000 \n", + "2 7224.137207 11.630354 \n", + "3 11767.498047 60.990691 \n", + "4 23150.865234 52.377523 \n", + "\n", + " _lso1k_gpo_urban_hd.tif_SUM _lso1k_gpo_urban_hd.tif_MIN \\\n", + "0 0.0 -0.0 \n", + "1 0.0 0.0 \n", + "2 0.0 -0.0 \n", + "3 0.0 0.0 \n", + "4 0.0 0.0 \n", + "\n", + " _lso1k_gpo_urban_hd.tif_MAX _lso1k_gpo_urban_hd.tif_MEAN \n", + "0 -0.0 0.0 \n", + "1 0.0 0.0 \n", + "2 -0.0 0.0 \n", + "3 0.0 0.0 \n", + "4 0.0 0.0 " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inD2.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "columns overlap but no suffix specified: Index(['TOTALPOP_lso1k_gpo.tif_SUM', 'TOTALPOP_lso1k_gpo.tif_MIN',\n 'TOTALPOP_lso1k_gpo.tif_MAX', 'TOTALPOP_lso1k_gpo.tif_MEAN',\n '_lso1k_gpo_urban.tif_SUM', '_lso1k_gpo_urban.tif_MIN',\n '_lso1k_gpo_urban.tif_MAX', '_lso1k_gpo_urban.tif_MEAN',\n '_lso1k_gpo_urban_hd.tif_SUM', '_lso1k_gpo_urban_hd.tif_MIN',\n '_lso1k_gpo_urban_hd.tif_MAX', '_lso1k_gpo_urban_hd.tif_MEAN'],\n dtype='object')", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0minD1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minD2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/.conda/envs/ee/lib/python3.9/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mjoin\u001b[0;34m(self, other, on, how, lsuffix, rsuffix, sort)\u001b[0m\n\u001b[1;32m 8108\u001b[0m \u001b[0;36m5\u001b[0m \u001b[0mK5\u001b[0m \u001b[0mA5\u001b[0m \u001b[0mNaN\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8109\u001b[0m \"\"\"\n\u001b[0;32m-> 8110\u001b[0;31m return self._join_compat(\n\u001b[0m\u001b[1;32m 8111\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mon\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mon\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhow\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mhow\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlsuffix\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlsuffix\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrsuffix\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrsuffix\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msort\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msort\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8112\u001b[0m )\n", + "\u001b[0;32m~/.conda/envs/ee/lib/python3.9/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_join_compat\u001b[0;34m(self, other, on, how, lsuffix, rsuffix, sort)\u001b[0m\n\u001b[1;32m 8133\u001b[0m \u001b[0msort\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msort\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8134\u001b[0m )\n\u001b[0;32m-> 8135\u001b[0;31m return merge(\n\u001b[0m\u001b[1;32m 8136\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8137\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.conda/envs/ee/lib/python3.9/site-packages/pandas/core/reshape/merge.py\u001b[0m in \u001b[0;36mmerge\u001b[0;34m(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[0mvalidate\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvalidate\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 88\u001b[0m )\n\u001b[0;32m---> 89\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 90\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.conda/envs/ee/lib/python3.9/site-packages/pandas/core/reshape/merge.py\u001b[0m in \u001b[0;36mget_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 684\u001b[0m \u001b[0mjoin_index\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mleft_indexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mright_indexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_join_info\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 685\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 686\u001b[0;31m llabels, rlabels = _items_overlap_with_suffix(\n\u001b[0m\u001b[1;32m 687\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mleft\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mright\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msuffixes\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 688\u001b[0m )\n", + "\u001b[0;32m~/.conda/envs/ee/lib/python3.9/site-packages/pandas/core/reshape/merge.py\u001b[0m in \u001b[0;36m_items_overlap_with_suffix\u001b[0;34m(left, right, suffixes)\u001b[0m\n\u001b[1;32m 2176\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2177\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mlsuffix\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mrsuffix\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2178\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"columns overlap but no suffix specified: {to_rename}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2179\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2180\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mrenamer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msuffix\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: columns overlap but no suffix specified: Index(['TOTALPOP_lso1k_gpo.tif_SUM', 'TOTALPOP_lso1k_gpo.tif_MIN',\n 'TOTALPOP_lso1k_gpo.tif_MAX', 'TOTALPOP_lso1k_gpo.tif_MEAN',\n '_lso1k_gpo_urban.tif_SUM', '_lso1k_gpo_urban.tif_MIN',\n '_lso1k_gpo_urban.tif_MAX', '_lso1k_gpo_urban.tif_MEAN',\n '_lso1k_gpo_urban_hd.tif_SUM', '_lso1k_gpo_urban_hd.tif_MIN',\n '_lso1k_gpo_urban_hd.tif_MAX', '_lso1k_gpo_urban_hd.tif_MEAN'],\n dtype='object')" + ] + } + ], + "source": [ + "inD1.join(inD2)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inD2.columns in inD1.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['TOTALPOP_lso1k_upo15.tif_SUM', 'TOTALPOP_lso1k_upo15.tif_MIN',\n", + " 'TOTALPOP_lso1k_upo15.tif_MAX', 'TOTALPOP_lso1k_upo15.tif_MEAN',\n", + " '_lso1k_upo15_urban.tif_SUM', '_lso1k_upo15_urban.tif_MIN',\n", + " '_lso1k_upo15_urban.tif_MAX', '_lso1k_upo15_urban.tif_MEAN',\n", + " '_lso1k_upo15_urban_hd.tif_SUM', '_lso1k_upo15_urban_hd.tif_MIN',\n", + " '_lso1k_upo15_urban_hd.tif_MAX', '_lso1k_upo15_urban_hd.tif_MEAN',\n", + " 'TOTALPOP_lso1k_gpo.tif_SUM', 'TOTALPOP_lso1k_gpo.tif_MIN',\n", + " 'TOTALPOP_lso1k_gpo.tif_MAX', 'TOTALPOP_lso1k_gpo.tif_MEAN',\n", + " '_lso1k_gpo_urban.tif_SUM', '_lso1k_gpo_urban.tif_MIN',\n", + " '_lso1k_gpo_urban.tif_MAX', '_lso1k_gpo_urban.tif_MEAN',\n", + " '_lso1k_gpo_urban_hd.tif_SUM', '_lso1k_gpo_urban_hd.tif_MIN',\n", + " '_lso1k_gpo_urban_hd.tif_MAX', '_lso1k_gpo_urban_hd.tif_MEAN',\n", + " 'TOTALPOP_lso1k_cpo15.tif_SUM', 'TOTALPOP_lso1k_cpo15.tif_MIN',\n", + " 'TOTALPOP_lso1k_cpo15.tif_MAX', 'TOTALPOP_lso1k_cpo15.tif_MEAN',\n", + " '_lso1k_cpo15_urban.tif_SUM', '_lso1k_cpo15_urban.tif_MIN',\n", + " '_lso1k_cpo15_urban.tif_MAX', '_lso1k_cpo15_urban.tif_MEAN',\n", + " '_lso1k_cpo15_urban_hd.tif_SUM', '_lso1k_cpo15_urban_hd.tif_MIN',\n", + " '_lso1k_cpo15_urban_hd.tif_MAX', '_lso1k_cpo15_urban_hd.tif_MEAN',\n", + " 'TOTALPOP_lso1k_cpo20.tif_SUM', 'TOTALPOP_lso1k_cpo20.tif_MIN',\n", + " 'TOTALPOP_lso1k_cpo20.tif_MAX', 'TOTALPOP_lso1k_cpo20.tif_MEAN',\n", + " '_lso1k_cpo20_urban.tif_SUM', '_lso1k_cpo20_urban.tif_MIN',\n", + " '_lso1k_cpo20_urban.tif_MAX', '_lso1k_cpo20_urban.tif_MEAN',\n", + " '_lso1k_cpo20_urban_hd.tif_SUM', '_lso1k_cpo20_urban_hd.tif_MIN',\n", + " '_lso1k_cpo20_urban_hd.tif_MAX', '_lso1k_cpo20_urban_hd.tif_MEAN'],\n", + " dtype='object')" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inD1.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['TOTALPOP_lso1k_gpo.tif_SUM', 'TOTALPOP_lso1k_gpo.tif_MIN',\n", + " 'TOTALPOP_lso1k_gpo.tif_MAX', 'TOTALPOP_lso1k_gpo.tif_MEAN',\n", + " '_lso1k_gpo_urban.tif_SUM', '_lso1k_gpo_urban.tif_MIN',\n", + " '_lso1k_gpo_urban.tif_MAX', '_lso1k_gpo_urban.tif_MEAN',\n", + " '_lso1k_gpo_urban_hd.tif_SUM', '_lso1k_gpo_urban_hd.tif_MIN',\n", + " '_lso1k_gpo_urban_hd.tif_MAX', '_lso1k_gpo_urban_hd.tif_MEAN'],\n", + " dtype='object')" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "inD2.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "pop_files = ['/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_upo15.tif',\n", + "'/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_gpo.tif',\n", + "'/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_cpo15.tif',\n", + "'/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_cpo20.tif',\n", + "'/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_gpo.tif',\n", + "'/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_upo15.tif']" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_gpo.tif',\n", + " '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_upo15.tif',\n", + " '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_cpo20.tif',\n", + " '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/lso_cpo15.tif']" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(set(pop_files))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Earth Engine", + "language": "python", + "name": "ee" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Notebooks/Implementations/URB_SEAU1_NovelUrbanization/novelUrbanization.py b/Notebooks/Implementations/URB_SEAU1_NovelUrbanization/novelUrbanization.py new file mode 100755 index 0000000..c184bea --- /dev/null +++ b/Notebooks/Implementations/URB_SEAU1_NovelUrbanization/novelUrbanization.py @@ -0,0 +1,348 @@ +import sys, os, importlib, shutil, multiprocessing +import requests +import rasterio, elevation, richdem +import rasterio.warp +from rasterio import features + +import pandas as pd +import geopandas as gpd +import numpy as np + +from shapely.geometry import MultiPolygon, Polygon, box, Point + +#Import raster helpers +import GOSTRocks.rasterMisc as rMisc +from GOSTRocks.misc import tPrint + +#Import GOST urban functions +sys.path.append("../../src") +import GOST_Urban.UrbanRaster as urban +import GOST_Urban.urban_helper as helper + +importlib.reload(helper) +importlib.reload(rMisc) + +def calculate_urban(iso3, inG, inG2, pop_files, ea_file, output_folder, km=True, small=True, include_ghsl_h20=True, evaluate=True): + global_landcover = "/home/public/Data/GLOBAL/LANDCOVER/GLOBCOVER/2015/ESACCI-LC-L4-LCCS-Map-300m-P1Y-2015-v2.0.7.tif" + global_ghspop = "/home/public/Data/GLOBAL/Population/GHS/250/GHS_POP_E2015_GLOBE_R2019A_54009_250_V1_0.tif" + global_ghspop_1k = "/home/public/Data/GLOBAL/Population/GHS/GHS_POP_E2015_GLOBE_R2019A_54009_1K_V1_0.tif" + global_ghbuilt = "/home/public/Data/GLOBAL/URBAN/GHS/GHS_1K_BUILT/GHS_BUILT_LDS2014_GLOBE_R2018A_54009_1K_V1_0.tif" + global_dem_1k = "/home/public/Data/GLOBAL/ELEV/noaa_1km.tif" + ghs_smod = "/home/public/Data/GLOBAL/URBAN/GHS/GHS_SMOD/GHS_SMOD_POP2015_GLOBE_R2019A_54009_1K_V2_0.tif" + ghsl_vrt = "/home/public/Data/GLOBAL/GHSL/ghsl.vrt" + + admin2_250_stats = os.path.join(output_folder, f"{iso3}_URBAN_ADMIN2_STATS_COMPILED.csv") + commune_250_stats = os.path.join(output_folder, f"{iso3}_URBAN_COMMUNE_STATS_COMPILED.csv") + admin2_1k_stats = os.path.join(output_folder, f"{iso3}_URBAN_ADMIN2_STATS_COMPILED_1k.csv") + commune_1k_stats = os.path.join(output_folder, f"{iso3}_URBAN_COMMUNE_STATS_COMPILED_1k.csv") + + inD = inG.loc[inG['ISO3'] == iso3] + inD['geometry'] = inD['geometry'].apply(lambda x: x.buffer(500)) + inD = inD.to_crs('epsg:4326') + + inD2 = inG2.loc[inG2['ISO3'] == iso3] + inD2 = inD2.to_crs('epsg:4326') + + ### Process 1km data + if km: + xx = helper.urban_country(iso3, output_folder, inD, pop_files, + final_folder="FINAL_STANDARD_1KM", ghspop_suffix="1k") + adm2_res = os.path.join(xx.final_folder, "URBAN_ADMIN2_STATS_COMPILED.csv") + ea_res = os.path.join(xx.final_folder, "URBAN_COMMUNE_STATS_COMPILED.csv") + tPrint(f"{iso3} ***1k Extracting Global Layers") + xx.extract_layers(global_landcover, global_ghspop, global_ghspop_1k, global_ghbuilt, ghsl_vrt, ghs_smod) + tPrint(f"{iso3} ***1k Downloading and processing elevation") + xx.process_dem(global_dem=global_dem_1k) + tPrint(f"{iso3} ***1k Standardizing rasters") + xx.standardize_rasters(include_ghsl_h20) + tPrint(f"{iso3} ***1k Calculating Urban") + xx.calculate_urban() + tPrint(f"{iso3} ***1k Calculating Zonal admin2") + if not os.path.exists(admin2_1k_stats): + zonal_adm2 = xx.pop_zonal_admin(inD2) + zonal_adm2.to_csv(admin2_1k_stats) + tPrint(f"{iso3} ***1k Calculating Zonal communes") + if os.path.exists(ea_file): + inEA = gpd.read_file(ea_file) + zonal_ea = xx.pop_zonal_admin(inEA) + zonal_ea.to_csv(commune_1k_stats) + if evaluate: + tPrint(f"{iso3} ***1k Evaluating Data") + xx.evaluateOutput(admin2_1k_stats, commune_1k_stats) + + ### Process 250m data + if small: + xx = helper.urban_country(iso3, output_folder, inD, pop_files) + tPrint(f"{iso3} ***** Extracting Global Layers %s" % iso3) + xx.extract_layers(global_landcover, global_ghspop, global_ghspop_1k, global_ghbuilt, ghsl_vrt, ghs_smod) + tPrint(f"{iso3} ***** Downloading and processing elevation %s" % iso3) + xx.process_dem(global_dem=global_dem_1k) + tPrint(f"{iso3} ***** Standardizing rasters") + xx.standardize_rasters(include_ghsl_h20) + tPrint(f"{iso3} ***** Calculating Urban") + xx.calculate_urban() + tPrint(f"{iso3} ***** Calculating Zonal admin2") + if not os.path.exists(admin2_250_stats): + zonal_adm2 = xx.pop_zonal_admin(inD2) + zonal_adm2.to_csv(admin2_250_stats) + tPrint(f"{iso3} ***** Calculating Zonal communes") + if os.path.exists(ea_file): + inEA = gpd.read_file(ea_file) + zonal_ea = xx.pop_zonal_admin(inEA) + zonal_ea.to_csv(commune_250_stats) + if evaluate: + tPrint(f"{iso3} ***** Evaluating Data") + xx.evaluateOutput(admin2_250_stats, commune_250_stats) + + +def calc_pp_urban(in_folder, default_pop_file, admin_layer, output_folder, iso3=''): + ''' Summarize urbanization from Pierre-Philippe's Dartboard methodology + + INPUT + input_folder [string path] - location of dartboard urbanization + default_pop_file [string path] - default pop filename to use for urban population calculations + admin_layer [string path] - zones used to summarize population + RETURN + [geopandas dataframe] - contains total population and urban population for each shape + ''' + urban_layers = [os.path.join(in_folder, x) for x in os.listdir(in_folder) if x[-4:] == ".tif"] + if iso3 != '': + urban_layers = [x for x in urban_layers if iso3.lower() in x] + + cur_layer = urban_layers[0] + inD = gpd.read_file(admin_layer) + default_pop_1k = default_pop_file.replace(default_pop_file[:3], "%s1k" % default_pop_file[:3]) + for cur_layer in urban_layers: + #tPrint(cur_layer) + #Open and read in urban data + urban_r = rasterio.open(cur_layer) + urban_data = urban_r.read() + urban_data = (urban_data > 0).astype(urban_r.meta['dtype']) + #Extract population data + urban_layer = os.path.basename(cur_layer) + default_pop = default_pop_file + + if "1k" in urban_layer: + default_pop = default_pop_1k + pop_layer = os.path.basename(cur_layer)[:11] + pop_folder = os.path.join(output_folder, "FINAL_STANDARD_1KM") + else: + pop_layer = os.path.basename(cur_layer)[:9] + pop_folder = os.path.join(output_folder, "FINAL_STANDARD") + pop_file = os.path.join(pop_folder,"%s.tif" % pop_layer) + + if not os.path.exists(pop_file): + if "1k" in urban_layer: + default_pop = default_pop_1k + pop_layer = os.path.basename(cur_layer)[:9] + pop_folder = os.path.join(output_folder, "FINAL_STANDARD_1KM") + else: + pop_layer = os.path.basename(cur_layer)[:7] + pop_folder = os.path.join(output_folder, "FINAL_STANDARD") + pop_file = os.path.join(pop_folder,"%s.tif" % pop_layer) + + pop_r = rasterio.open(pop_file) + pop_data = pop_r.read() + pop_data = pop_data * urban_data + meta = urban_r.meta.copy() + meta.update(dtype = pop_data.dtype) + + #Calculate total population + total_pop_field = os.path.basename(pop_file).replace(".tif", "") + if not total_pop_field in inD.columns: + res = rMisc.zonalStats(inD, pop_r, reProj=True, minVal=0) + res = pd.DataFrame(res, columns=['SUM', 'MIN', 'MAX', 'MEAN']) + inD[total_pop_field] = res['SUM'] + + #Calculate urban population + with rMisc.create_rasterio_inmemory(meta, pop_data) as pop_r: + res = rMisc.zonalStats(inD, pop_r, reProj=True, minVal=0) + res = pd.DataFrame(res, columns=['SUM', 'MIN', 'MAX', 'MEAN']) + + inD[os.path.basename(cur_layer).replace(".tif","")] = res['SUM'] + return(inD) + +def check_no_data(in_folder): + ''' loop through all the tif files in the FINAL folders and calculate the number of no-data cells + ''' + for root, dirs, files in os.walk(in_folder): + if "FINAL" in root: + for f in files: + if (not "NO_DATA" in f) and (not 'urban' in f): + if f[-4:] == ".tif": + cur_file = os.path.join(root, f) + curR = rasterio.open(cur_file) + curD = curR.read() + print(f'{f}: {(curD == curR.meta["nodata"]).sum()}') + +def pp_point_urban_summaries(inD, urban_tiffs, out_file): + ''' summarize urbanization for point locations (inD) for each urban definition file (urban_tiffs) + ''' + for pFile in urban_tiffs: + if pFile.endswith(".tif"): + try: + rFile = rasterio.open(pFile) + if inD.crs != rFile.crs: + inD = inD.to_crs(rFile.crs) + geoms = [(row['geometry'].x, row['geometry'].y) for idx, row in inD.iterrows()] + urb_res = rFile.sample(geoms) + inD[os.path.basename(pFile).replace(".tif","")] = [x[0] for x in list(urb_res)] + except: + pass + pd.DataFrame(inD.drop(['geometry'], axis=1)).to_csv(out_file) + + +def point_urban_summaries(inD, pop_tiffs, out_file): + ''' summarize urbanization for point locations (inD) for each population file (pop_tiffs) + ''' + for pFile in pop_tiffs: + urb_file = pFile.replace(".tif", "_urban.tif") + hd_file = pFile.replace(".tif", "_urban_hd.tif") + # For each population file there is an urban file and a HD urban file + for curFile in [urb_file, hd_file]: + try: + inUrb = rasterio.open(curFile) + if inD.crs!= inUrb.crs: + inD = inD.to_crs(inUrb.crs) + geoms = [(row['geometry'].x, row['geometry'].y) for idx, row in inD.iterrows()] + urb_res = inUrb.sample(geoms) + inD[os.path.basename(curFile).replace(".tif","")] = [x[0] for x in list(urb_res)] + except: + pass + pd.DataFrame(inD.drop(['geometry'], axis=1)).to_csv(out_file) + +def run_country(iso3): + local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/".format(country=iso3) + +def run_zonal(iso3, output_folder, inG, pop_files, ea_file, pt_file): + ''' Summarize zonal statistics for urbanization numbers against polygons and points for both WB and PP urban calculations + ''' + pp_deleniations_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/AAPPC/Delineations" + + inD = inG.loc[inG['ISO3'] == iso3].copy() + inD['geometry'] = inD['geometry'].apply(lambda x: x.buffer(500)) + inD = inD.to_crs('epsg:4326') + + # Run zonal stats on WB stats using ea boundary + out_ea_zonal = os.path.join(output_folder, f"{iso3}_EA_WB_URBAN_1K.csv") + if not os.path.exists(out_ea_zonal) & os.path.exists(ea_file): + xx = helper.urban_country(iso3, output_folder, inD, pop_files, final_folder="FINAL_STANDARD_1KM", ghspop_suffix="1k") + zonal_ea = xx.pop_zonal_admin(gpd.read_file(ea_file)) + zonal_ea.to_csv(out_ea_zonal) + + out_ea_zonal = os.path.join(output_folder, f"{iso3}_EA_WB_URBAN_250.csv") + xx = helper.urban_country(iso3, output_folder, inD, pop_files, final_folder="FINAL_STANDARD", ghspop_suffix="") + zonal_ea = xx.pop_zonal_admin(gpd.read_file(ea_file)) + zonal_ea.to_csv(out_ea_zonal) + + # Run zonal stats on pp urban using ea boundary + out_ea_pp_zonal = os.path.join(output_folder, f"{iso3}_EA_PP_URBAN.csv") + if (os.path.exists(ea_file)): # & not os.path.exists(out_ea_pp_zonal): + pp_zonal_ea = calc_pp_urban(pp_deleniations_folder, pop_files[0][1], ea_file, output_folder, iso3) + if 'geometry' in pp_zonal_ea.columns: + pp_zonal_ea = pp_zonal_ea.drop(['geometry'], axis=1) + pp_zonal_ea.to_csv(out_ea_pp_zonal) + + wb_out_file = os.path.join(output_folder, f"{iso3}_HH_GPS_WB_URBAN.csv") + pp_out_file = os.path.join(output_folder, f"{iso3}_HH_GPS_PP_URBAN.csv") + if os.path.exists(pt_file) and not os.path.exists(wb_out_file): + cur_pt = gpd.read_file(pt_file) + all_tiffs = [] + base_folder = os.path.join(output_folder, "FINAL_STANDARD") + base_folder_1km = os.path.join(output_folder, "FINAL_STANDARD_1KM") + for file_defs in pop_files: + pFile = file_defs[1] + all_tiffs.append(os.path.join(base_folder, pFile)) + all_tiffs.append(os.path.join(base_folder_1km, pFile.replace(f"{iso3.lower()}", f"{iso3.lower()}1k"))) + point_urban_summaries(cur_pt, all_tiffs, wb_out_file) + + if os.path.exists(pt_file) and not os.path.exists(pp_out_file): + # Get list of urban tiffs from PP + urban_tiffs = [os.path.join(pp_deleniations_folder, x) for x in os.listdir(pp_deleniations_folder) if iso3.lower() in x] + pp_point_urban_summaries(cur_pt, urban_tiffs, pp_out_file) + + +EA_DEFS = { # Define ea files per iso3 + # iso3 : folder, polygon file, point file + "BFA": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/BurkinaFaso/","bfa_admbnda_adm3_igb_20200323.shp", "bfa.geojson"], + "TCD": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Chad", "tcd_a_admbnd_adm3_ocha.shp", "ChadFinal.geojson"], + "GIN": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Guinea/","gin_admbnda_adm3_ocha.shp", "GINFinal.geojson"], + "GNB": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Guinea Bissau/","gnb_admbnda_adm2_1m_salb_20210609.shp", "GNBFinal.geojson"], + "GAB": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Gabon", "CANTONS_region.shp", "gabon_gps.geojson"], + "LSO": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Lesotho/","lso_admbnda_adm2_FAO_MLGCA_2019.shp", "lesotho_list.geojson"], + "MWI": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Malawi", "mwi_admbnda_adm3_nso_20181016.shp", "checkedcoord_malawi.geojson"], + "MLI": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Mali", "mli_admbnda_adm3_1m_dnct_20190802.shp", "MaliFinal.geojson"], + "MAU": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Mauritania", "MAU_edit.shp", "mauritania.geojson"], + "NER": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Niger", "NER_adm03_feb2018.shp", "NigerFinal.geojson"], + "SEN": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Senegal/","sen_admbnda_adm3_1m_gov_ocha_20190426.shp", "senegal.geojson"], + "UGA": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Uganda/uganda_parishes_cleaned_attached", "uganda_parishes_cleaned_attached.shp", ""], + 'CIV': ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/CIV/", "civ_admbnda_adm1_cntig_ocha_itos_20180706.shp", "civ.geojson"], + 'AGO': ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Angola/", "bairros.shp", ""], + 'ETH': ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Ethiopia/", "Ethiopia_pti_admin3.shp", "HBS_GPS.geojson"], +} + +if __name__ == "__main__": + print("STARTING") + global_bounds = "/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp" + global_bounds_adm2 = "/home/public/Data/GLOBAL/ADMIN/Admin2_Polys.shp" + global_ghspop = "/home/public/Data/GLOBAL/Population/GHS/250/GHS_POP_E2015_GLOBE_R2019A_54009_250_V1_0.tif" + global_ghspop_1k = "/home/public/Data/GLOBAL/Population/GHS/GHS_POP_E2015_GLOBE_R2019A_54009_1K_V1_0.tif" + worldPop_2015 = "/home/public/Data/GLOBAL/Population/WorldPop_PPP_2015/worldPop_2015.vrt" + constrained_WP_folder = "/home/public/Data/GLOBAL/Population/RF_SSA_2015-2020" + out_base = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data" + + inG = gpd.read_file(global_bounds) + inG2 = gpd.read_file(global_bounds_adm2) + + runSmall = True + runLarge = True + + focal_countries = inG.loc[inG['Region'] == 'Sub-Saharan Africa'].sort_values(['ISO3'])['ISO3'].values + nCores = min(len(focal_countries), round(multiprocessing.cpu_count() * .8)) + all_commands = [] + zonal_commands = [] + for iso3 in EA_DEFS.keys(): #focal_countries: #: #['MAR','DZA','TUN','LBY'] + tPrint(iso3) + try: + cur_def = EA_DEFS[iso3] + ea_file = os.path.join(cur_def[0], cur_def[1]) + pt_file = os.path.join(cur_def[0], cur_def[2]) + except: + ea_file = '' + pt_file = '' + + output_folder = os.path.join(out_base, "%s_URBAN_DATA_new_naming" % iso3) + pop_files = [[worldPop_2015, f'{iso3.lower()}_upo15.tif']] + # Identify the constrained WorldPop layer + c_WP_15 = f'{constrained_WP_folder}/{iso3}/ppp_{iso3}_const_2015.tif' + c_WP_20 = f'{constrained_WP_folder}/{iso3}/ppp_{iso3}_const_2020.tif' + if os.path.exists(c_WP_15): + pop_files.append([c_WP_15, f'{iso3.lower()}_cpo15.tif']) + else: + print(f'***** Could not locate constrained WorldPop 2015 for {iso3}') + if os.path.exists(c_WP_20): + pop_files.append([c_WP_20, f'{iso3.lower()}_cpo20.tif']) + else: + print(f'***** Could not locate constrained WorldPop 2020 for {iso3}') + + ''' + try: + run_zonal(iso3, output_folder, inG, pop_files, ea_file, pt_file) + #calculate_urban(iso3, inG, inG2, pop_files, ea_file, output_folder, km=True, small=True) + except: + print(f"Error with {iso3}") + ''' + cur_args = [iso3, inG, inG2, pop_files, ea_file, output_folder] + all_commands.append(cur_args) + + pop_files.append([constrained_WP_folder, f'{iso3.lower()}_gpo.tif']) + zonal_args = [iso3, output_folder, inG, pop_files, ea_file, pt_file] + zonal_commands.append(zonal_args) + + with multiprocessing.Pool(nCores) as pool: + #pool.starmap(calculate_urban, all_commands) + pool.starmap(run_zonal, zonal_commands) + + + + \ No newline at end of file diff --git a/Notebooks/Tutorials/LEI_Example.ipynb b/Notebooks/Tutorials/LEI_Example.ipynb index 20170ae..21b4e8f 100644 --- a/Notebooks/Tutorials/LEI_Example.ipynb +++ b/Notebooks/Tutorials/LEI_Example.ipynb @@ -51,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -77,90 +77,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
geometryoldtotalLEI
0POLYGON ((132510 701340, 132510 701280, 132570...03910.000000
1POLYGON ((133710 701070, 133710 701040, 133740...13560.002809
2POLYGON ((132750 701010, 132750 700950, 132780...13760.002660
3POLYGON ((133380 700890, 133380 700830, 133410...03760.000000
4POLYGON ((132810 700800, 132810 700770, 132840...13560.002809
\n", - "
" - ], - "text/plain": [ - " geometry old total LEI\n", - "0 POLYGON ((132510 701340, 132510 701280, 132570... 0 391 0.000000\n", - "1 POLYGON ((133710 701070, 133710 701040, 133740... 1 356 0.002809\n", - "2 POLYGON ((132750 701010, 132750 700950, 132780... 1 376 0.002660\n", - "3 POLYGON ((133380 700890, 133380 700830, 133410... 0 376 0.000000\n", - "4 POLYGON ((132810 700800, 132810 700770, 132840... 1 356 0.002809" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# This calculates the change from 1990 and 2000\n", "lei_raw = lei.calculate_LEI(input_ghsl, old_list = [5,6], new_list=[4])\n", @@ -171,90 +90,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
geometryoldtotalLEI
0POLYGON ((132540 701340, 132540 701310, 132570...33560.008427
1POLYGON ((133530 701040, 133530 701010, 133560...63560.016854
2POLYGON ((133380 701010, 133380 700980, 133410...63560.016854
3POLYGON ((133560 701010, 133560 700980, 133590...63910.015345
4POLYGON ((133350 700980, 133350 700950, 133380...63560.016854
\n", - "
" - ], - "text/plain": [ - " geometry old total LEI\n", - "0 POLYGON ((132540 701340, 132540 701310, 132570... 3 356 0.008427\n", - "1 POLYGON ((133530 701040, 133530 701010, 133560... 6 356 0.016854\n", - "2 POLYGON ((133380 701010, 133380 700980, 133410... 6 356 0.016854\n", - "3 POLYGON ((133560 701010, 133560 700980, 133590... 6 391 0.015345\n", - "4 POLYGON ((133350 700980, 133350 700950, 133380... 6 356 0.016854" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# This calculates the change from 2000 and 2014\n", "lei_raw = lei.calculate_LEI(input_ghsl, old_list = [4,5,6], new_list=[3])\n", @@ -265,24 +103,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "class\n", - "Expansion 26.7696\n", - "Infill 1.7469\n", - "Leapfrog 2.0295\n", - "Name: area, dtype: float64" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "importlib.reload(lei)\n", "#Calculate summaries of lei\n", @@ -291,24 +114,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "class\n", - "Expansion 3.8331\n", - "Infill 0.0279\n", - "Leapfrog 0.8982\n", - "Name: area, dtype: float64" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "#Calculate summaries of lei\n", "lei.summarize_LEI(lei_00_14, leap_val=0.05, exp_val=0.75)/1000000" @@ -316,7 +124,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -334,7 +142,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -366,7 +174,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -445,7 +253,7 @@ "4 POLYGON ((132510 701370, 132510 701340, 132540... 6 356 0.016854" ] }, - "execution_count": 11, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -460,7 +268,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -539,7 +347,7 @@ "4 POLYGON ((132540 701370, 132540 701310, 132570... 10 457 0.021882" ] }, - "execution_count": 12, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -554,52 +362,52 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "class\n", - "Expansion 9.8775\n", - "Infill 2.0466\n", - "Leapfrog 0.3978\n", + "Expansion 6.6150\n", + "Infill 5.6961\n", + "Leapfrog 0.0108\n", "Name: area, dtype: float64" ] }, - "execution_count": 13, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Calculate summaries of lei\n", - "lei.summarize_LEI(lei_90_00, leap_val=0.05, exp_val=0.75)/1000000" + "lei.summarize_LEI(lei_90_00, leap_val=0.001, exp_val=0.5)/1000000" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "class\n", - "Expansion 35.9073\n", - "Infill 5.4675\n", - "Leapfrog 1.0584\n", + "Expansion 23.0148\n", + "Infill 19.2915\n", + "Leapfrog 0.1269\n", "Name: area, dtype: float64" ] }, - "execution_count": 14, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Calculate summaries of lei\n", - "lei.summarize_LEI(lei_00_14, leap_val=0.05, exp_val=0.75)/1000000" + "lei.summarize_LEI(lei_00_14, leap_val=0.001, exp_val=0.5)/1000000" ] }, { @@ -612,6 +420,97 @@ "lei_90_00.to_csv(os.path.join(input_folder, \"WSF_LEI_90_00.csv\"))\n", "lei_00_14.to_csv(os.path.join(input_folder, \"WSF_LEI_00_14.csv\"))" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "zip -r BGD_1KM.zip BGD_URBAN_DATA_new_naming/FINAL_STANDARD_1KM\n", + "zip -r BGD_STANDARD.zip BGD_URBAN_DATA_new_naming/FINAL_STANDARD\n", + "zip -r TZA_1KM.zip TZA_URBAN_DATA_new_naming/FINAL_STANDARD_1KM\n", + "zip -r TZA_STANDARD.zip TZA_URBAN_DATA_new_naming/FINAL_STANDARD\n", + "zip -r GHA_1KM.zip GHA_URBAN_DATA_new_naming/FINAL_STANDARD_1KM\n", + "zip -r GHA_STANDARD.zip GHA_URBAN_DATA_new_naming/FINAL_STANDARD\n", + "zip -r ETH_1KM.zip ETH_URBAN_DATA_new_naming/FINAL_STANDARD_1KM\n", + "zip -r ETH_STANDARD.zip ETH_URBAN_DATA_new_naming/FINAL_STANDARD\n", + "zip -r EGY_1KM.zip EGY_URBAN_DATA_new_naming/FINAL_STANDARD_1KM\n", + "zip -r EGY_STANDARD.zip EGY_URBAN_DATA_new_naming/FINAL_STANDARD\n", + "zip -r COL_1KM.zip COL_URBAN_DATA_new_naming/FINAL_STANDARD_1KM\n", + "zip -r COL_STANDARD.zip COL_URBAN_DATA_new_naming/FINAL_STANDARD\n", + "zip -r AGO_1KM.zip AGO_URBAN_DATA_new_naming/FINAL_STANDARD_1KM\n", + "zip -r AGO_STANDARD.zip AGO_URBAN_DATA_new_naming/FINAL_STANDARD\n", + "zip -r VNM_1KM.zip VNM_URBAN_DATA_new_naming/FINAL_STANDARD_1KM\n", + "zip -r VNM_STANDARD.zip VNM_URBAN_DATA_new_naming/FINAL_STANDARD\n" + ] + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'VNM_STANDARD.zip'" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"%s_%s.zip\" % (cur_dir.split(\"/\")[-2].split(\"_\")[0], cur_dir.split(\"_\")[-1])" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'STANDARD'" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cur_dir.split(\"_\")[-1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/Notebooks/URB_DECAT_ExtractByISO3.ipynb b/Notebooks/URB_DECAT_ExtractByISO3.ipynb new file mode 100644 index 0000000..0edc9c1 --- /dev/null +++ b/Notebooks/URB_DECAT_ExtractByISO3.ipynb @@ -0,0 +1,261 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Extract urban areas on JNB\n", + "\n", + "This code provides an example of how to calculate urban areas from gridded population data. However, it is designed explicitly to run on the GOST Jupyter notebook server, so the paths may not work." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/wb411133/.conda/envs/geog/lib/python3.7/site-packages/geopandas/_compat.py:88: UserWarning: The Shapely GEOS version (3.7.1-CAPI-1.11.1 0) is incompatible with the GEOS version PyGEOS was compiled with (3.9.0-CAPI-1.16.2). Conversions between both will be slow.\n", + " shapely_geos_version, geos_capi_version_string\n" + ] + } + ], + "source": [ + "import sys, os, importlib, json\n", + "import rasterio, geojson\n", + "\n", + "import pandas as pd\n", + "import geopandas as gpd\n", + "import numpy as np\n", + "\n", + "from shapely.geometry import shape, Polygon\n", + "\n", + "#Import raster helpers\n", + "sys.path.append(\"../../gostrocks/src\")\n", + "import GOSTRocks.rasterMisc as rMisc\n", + "from GOSTRocks.misc import tPrint\n", + "\n", + "#Import GOST urban functions\n", + "sys.path.append(\"../\")\n", + "import src.UrbanRaster as urban\n", + "import src.urban_helper as helper\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "iso3 = \"LKA\"\n", + "out_folder = \"/home/wb411133/temp/%s_URBAN\" % iso3\n", + "if not os.path.exists(out_folder):\n", + " os.makedirs(out_folder)\n", + "\n", + "urban_ext = os.path.join(out_folder, \"URBAN_Extents.shp\")\n", + "hd_urban_ext = os.path.join(out_folder, \"HD_URBAN_Extents.shp\")\n", + "pop_file = os.path.join(out_folder, \"population_2020.tif\")\n", + " \n", + "global_bounds = \"/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp\"\n", + "global_pop = \"/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/ppp_2020_1km_Aggregated.tif\"\n", + "\n", + "inG = gpd.read_file(global_bounds)\n", + "selG = inG.loc[inG['ISO3'] == iso3]\n", + "inPop_raster = rasterio.open(global_pop)\n", + "\n", + "if selG.crs != inPop_raster.crs:\n", + " selG = selG.to_crs(inPop_raster.crs)\n", + " \n", + "if not os.path.exists(pop_file):\n", + " rMisc.clipRaster(inPop_raster, selG, pop_file)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "12:31:20\t: Read in urban data\n", + "12:31:20\t: Creating Shape 0\n", + "12:31:20\t: Creating Shape 1000\n" + ] + } + ], + "source": [ + "curR = rasterio.open(pop_file)\n", + "urban_calculator = urban.urbanGriddedPop(curR)\n", + "urban_extents = urban_calculator.calculateUrban(densVal=300, totalPopThresh=5000, \n", + " smooth=False, queen=False,\n", + " verbose=True)\n", + "urban_extents.to_file(urban_ext)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "12:31:21\t: Read in urban data\n", + "12:31:21\t: Creating Shape 0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "../src/UrbanRaster.py:290: UserWarning: Geometry is in a geographic CRS. Results from 'buffer' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.\n", + "\n", + " xxGeom['geometry '] = xxGeom.buffer((popRaster.res[0] / 2))\n" + ] + } + ], + "source": [ + "urban_extents = urban_calculator.calculateUrban(densVal=1500, totalPopThresh=50000, \n", + " smooth=True, queen=True,\n", + " verbose=True)\n", + "urban_extents.to_file(hd_urban_ext)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Reverse Geocode the urban extents" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import reverse_geocode" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IDPop
COUNTRY
India22893.211364e+04
Sri Lanka1188541.608292e+07
\n", + "
" + ], + "text/plain": [ + " ID Pop\n", + "COUNTRY \n", + "India 2289 3.211364e+04\n", + "Sri Lanka 118854 1.608292e+07" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "extents = gpd.read_file(urban_ext)\n", + "\n", + "extents['NAME'] = ''\n", + "extents['COUNTRY'] = ''\n", + "for idx, row in extents.iterrows():\n", + " cur_city = reverse_geocode.search([(row['geometry'].centroid.y, row['geometry'].centroid.x)])\n", + " extents.loc[idx, 'COUNTRY'] = cur_city[0]['country']\n", + " extents.loc[idx, 'NAME'] = cur_city[0]['city']\n", + " #print(idx)\n", + "extents.groupby('COUNTRY').sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "extents.to_file(urban_ext)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "extents.to_file(hd_urban_ext)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (geog)", + "language": "python", + "name": "geog" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/GOST_Urban/LEI.py b/src/GOST_Urban/LEI.py new file mode 100755 index 0000000..6ad5b2b --- /dev/null +++ b/src/GOST_Urban/LEI.py @@ -0,0 +1,167 @@ +import os, sys, logging, multiprocessing + +import geojson, rasterio +import rasterio.features + +import pandas as pd +import numpy as np + +from GOSTRocks.misc import tPrint +from shapely.geometry import shape, GeometryCollection +from shapely.wkt import loads + +def mp_lei(curRxx, transformxx, idx_xx, old_list=[4,5,6], new_list=[3], buffer_dist=300): + ''' calculate and summarize LEI for curRxx, designed for use in multiprocessing function ''' + curRes = calculate_LEI(curRxx, transform=transformxx, old_list=old_list, new_list=new_list, buffer_dist=buffer_dist) + xx = pd.DataFrame(curRes, columns=['geometry', 'old', 'total']) + xx['LEI'] = xx['old'] / xx['total'] + final = summarize_LEI(xx) + final['idx'] = idx_xx + return(final) + +def lei_from_feature(inD, inR, old_list = [4,5,6], new_list=[3], buffer_dist=300, transform='', nCores=0, measure_crs=None, idx_col=None): + ''' Calculate LEI for each feature in inD, leveraging multi-processing, based on the built area in inR + + INPUT + inD [geopandas] + inR [rasterio] + [optional] nCores [int] - number of cores to use in multi-processing + [optional] measure_crs [string] - string to convert all data to a CRS where distance and area measurements don't suck ie - "ESRI:54009" + ... see calculate_LEI for remaining arguments + ''' + if inD.crs != inR.crs: + inD = inD.to_crs(inR.crs) + + if not measure_crs is None: + measureD = inD.to_crs(measure_crs) + + lei_results = {} + # For grid cells, extract the GHSL and calculate + in_vals = [] + tPrint('***** Preparing values for multiprocessing') + for idx, row in inD.iterrows(): + if idx % 100 == 0: + tPrint(f'{idx} of {inD.shape[0]}: {len(in_vals)}') + ul = inR.index(*row['geometry'].bounds[0:2]) + lr = inR.index(*row['geometry'].bounds[2:4]) + # read the subset of the data into a numpy array + window = ((float(lr[0]), float(ul[0]+1)), (float(ul[1]), float(lr[1]+1))) + curR = inR.read(1, window=window) + if (np.isin(curR, old_list).sum() > 2) & (np.isin(curR, new_list).sum() > 2): + if measure_crs is None: + transform = rasterio.transform.from_bounds(*row['geometry'].bounds, curR.shape[0], curR.shape[1]) + else: + transform = rasterio.transform.from_bounds(*measureD.loc[idx,'geometry'].bounds, curR.shape[0], curR.shape[1]) + cur_idx = idx + if idx_col: + cur_idx = row[idx_col] + in_vals.append([curR, transform, cur_idx, old_list, new_list, buffer_dist]) + + if nCores == 0: + nCores = multiprocessing.cpu_count() + tPrint('***** starting multiprocessing') + with multiprocessing.Pool(nCores) as pool: + res = pool.starmap(mp_lei, in_vals) + + res = pd.DataFrame(res) + res = res.reset_index() + res.index = res['idx'] + #res.drop(['idx'], axis=1, inplace=True) + return(res) + +def calculate_LEI(inputGHSL, old_list = [4,5,6], new_list=[3], buffer_dist=300, transform=''): + ''' Generate urban, feature-level LEI; generate area calculations of new and old extents + + INPUT + inputGHSL [string or rasterio object] - path to a geotiff or a rasterio object, or a numpy array + [optional] old_list [list of numbers] - values in built area raster to consider old urban. In GHSL 4, 5, 6 indicates all built until 2000 + [optional] new_list [int] - value in built area raster to consider new urban. In GHSL 3 indicates 2014 + + RETURNS + [array] - individual new built areas with LEI results. Each item is a single new built feature with three columns: + 1. geometry of the new built area feature + 2. number of pixels in new built area donut from old built area + 3. area of new built area buffer + + EXAMPLE + # This calculates the change from 1990 and 2000 + lei_raw = calculate_LEI(input_ghsl, old_list = [5,6], new_list=[4]) + lei_90_00 = pd.DataFrame(lei_raw, columns=['geometry', 'old', 'total']) + lei_90_00['LEI'] = lei_90_00['old'] / lei_90_00['total'] + lei_90_00.head() + ''' + if isinstance(inputGHSL, str): + inRaster = rasterio.open(inputGHSL).read() + inR = inRaster.read() + transform = inR.transform + elif isinstance(inputGHSL, rasterio.DatasetReader): + inR = inputGHSL.read() + else: + inR = inputGHSL + if len(inR.shape) == 2: + inR = inR.reshape(1, inR.shape[0], inR.shape[1]) + newR = (np.isin(inR, new_list)).astype('int') + oldR = (np.isin(inR, old_list)).astype('int') + allVals = [] + for geom, value in rasterio.features.shapes(newR.astype('uint8'), transform=transform): + if value == 1: + # Convert the geom to a shape and buffer by 300 metres + curShape = shape(geom) + bufferArea = curShape.buffer(buffer_dist) + #Clip out the original shape to leave just the donut + try: + donutArea = bufferArea.difference(curShape) + except: + bufferArea = bufferArea.buffer(0) + curShape = curShape.buffer(0) + donutArea = bufferArea.difference(curShape) + # Rasterize donut shape + shapes = [(donutArea, 1)] + burned = rasterio.features.rasterize(shapes=shapes, fill=0, + out_shape=(oldR.shape[1], oldR.shape[2]), + transform=transform) + # Multiply the new raster by the old urban data to get the total + # amount of old area in the buffer around the new urban area + oldArea = (oldR[0,:,:] * burned).sum() + totalArea = burned.sum() + allVals.append([curShape, oldArea, totalArea]) + + return(allVals) + +def summarize_LEI(in_file, leap_val=0.05, exp_val=0.9): + ''' Summarize the LEI csv files produced by calculate_LEI + + in_file [string path or datafrane]: generated from the calculate_LEI above + leap_val [float]: LEI value below which areas are considered to be leapfrog + exp_val [float]: LEI value above which areas are considered to be infill + + returns + [pandas groupby row] + + example + + for res_file in all_results_files: + res = summarize_LEI(res_file) + baseName = os.path.basename(os.path.dirname(res_file)) + summarized_results[baseName] = res + + all_results = pd.DataFrame(summarized_results).transpose() + ''' + if isinstance(in_file, str): + res = pd.read_csv(in_file) + res['area'] = res['geometry'].apply(lambda x: loads(x).area) + else: + res = in_file + if not 'area' in res.columns: + res['area'] = res['geometry'].apply(lambda x: x.area) + + def calculate_LEI(val, leap_val, exp_val): + if val <= leap_val: + return('Leapfrog') + elif val < exp_val: + return('Expansion') + else: + return('Infill') + res['class'] = res['LEI'].apply(lambda x: calculate_LEI(x, leap_val, exp_val)) + xx = res.groupby('class') + return(xx.sum()['area']) diff --git a/src/UrbanRaster.py b/src/GOST_Urban/UrbanRaster.py similarity index 100% rename from src/UrbanRaster.py rename to src/GOST_Urban/UrbanRaster.py diff --git a/src/WSF/__init__.py b/src/GOST_Urban/WSF/__init__.py similarity index 100% rename from src/WSF/__init__.py rename to src/GOST_Urban/WSF/__init__.py diff --git a/src/WSF/wsfdata.py b/src/GOST_Urban/WSF/wsfdata.py similarity index 100% rename from src/WSF/wsfdata.py rename to src/GOST_Urban/WSF/wsfdata.py diff --git a/src/__init__.py b/src/GOST_Urban/__init__.py similarity index 100% rename from src/__init__.py rename to src/GOST_Urban/__init__.py diff --git a/src/urban_helper.py b/src/GOST_Urban/urban_helper.py similarity index 87% rename from src/urban_helper.py rename to src/GOST_Urban/urban_helper.py index d9f8464..58a937e 100755 --- a/src/urban_helper.py +++ b/src/GOST_Urban/urban_helper.py @@ -13,10 +13,7 @@ import numpy as np sys.path.append("../") -import src.UrbanRaster as urban - -#Import raster helpers -sys.path.append("../../gostrocks/src") +import GOST_Urban.UrbanRaster as urban import GOSTRocks.rasterMisc as rMisc from GOSTRocks.misc import tPrint @@ -25,6 +22,11 @@ class summarize_population(object): ''' summarize population and urban populations for defined admin regions ''' def __init__(self, pop_layer, admin_layer, urban_layer='', hd_urban_layer='', temp_folder=''): + ''' Summarize population into urban and rural based on GOST_Urban.UrbanRaster.calculateUrban + + INPUT + pop_layer [string] - path + ''' self.pop_layer = pop_layer self.urban_layer = urban_layer @@ -66,7 +68,7 @@ def calculate_zonal(self, out_name=''): res = rMisc.zonalStats(inA, self.in_pop, minVal=0) final = pd.DataFrame(res, columns=["TOTALPOP_%s_%s" % (os.path.basename(self.pop_layer), x) for x in ['SUM', 'MIN', 'MAX', 'MEAN']]) - for lyr in [self.urban_layer, self.urban_hd_layer]: + for lyr in [self.urban_layer, self.urban_hd_layer]: name = os.path.basename(lyr) in_urban = rasterio.open(lyr) inU = in_urban.read() @@ -85,7 +87,7 @@ def calculate_zonal(self, out_name=''): return(final) class urban_country(object): - ''' + ''' Extract and summarize urbanization in selected country, based on novel urbanization work of Mark Roberts and Shohei Nakamura ''' def __init__(self, iso3, output_folder, country_bounds, pop_files, final_folder = "", ghspop_suffix=""): @@ -121,6 +123,7 @@ def __init__(self, iso3, output_folder, country_bounds, pop_files, final_folder self.dem_file = os.path.join(output_folder, "%s_ele.tif" % iso3.lower()) self.slope_file = os.path.join(output_folder, "%s_slo.tif" % iso3.lower()) + self.desert_file = os.path.join(output_folder, "%s_des.tif" % iso3.lower()) self.lc_file = os.path.join(output_folder, "%s_lc.tif" % iso3.lower()) self.lc_file_h20 = os.path.join(output_folder, "%s_wat_lc.tif" % iso3.lower()) self.ghsl_h20 = os.path.join(output_folder, "%s_wat.tif" % iso3.lower()) @@ -136,14 +139,18 @@ def __init__(self, iso3, output_folder, country_bounds, pop_files, final_folder out_pop_file = os.path.join(output_folder, fileDef[1]) self.pop_files.append(out_pop_file) if not os.path.exists(out_pop_file): - shutil.copy(fileDef[0], out_pop_file) - if ghspop_suffix == '1k': - self.pop_files.append(self.ghspop1k_file) - shutil.copy(self.ghspop1k_file, os.path.join(self.final_folder, os.path.basename(self.ghspop1k_file))) - else: - self.pop_files.append(self.ghspop_file) - shutil.copy(self.ghspop_file, os.path.join(self.final_folder, os.path.basename(self.ghspop_file))) + tPrint(f'Clipping {fileDef[0]}') + rMisc.clipRaster(rasterio.open(fileDef[0]), country_bounds, out_pop_file) + ''' + if ghspop_suffix == '1k': + if not self.ghspop1k_file in self.pop_files: + self.pop_files.append(self.ghspop1k_file) + else: + ''' + if not self.ghspop_file in self.pop_files: + self.pop_files.append(self.ghspop_file) + self.pop_files = list(set(self.pop_files)) # Write admin shapefile to output file self.inD = country_bounds if not os.path.exists(self.admin_shp): @@ -177,6 +184,20 @@ def process_dem(self, global_dem=''): def extract_layers(self, global_landcover, global_ghspop, global_ghspop1k, global_ghbuilt, global_ghsl, global_smod): ''' extract global layers for current country ''' + # Extract desert from globcover + if not os.path.exists(self.desert_file): + tPrint("Extracting desert") + if not os.path.exists(self.lc_file): + rMisc.clipRaster(rasterio.open(global_landcover), self.inD, self.lc_file) + in_lc = rasterio.open(self.lc_file) + inL = in_lc.read() + lcmeta = in_lc.meta.copy() + tempL = (inL == 200).astype(lcmeta['dtype']) + lcmeta.update(nodata=255) + with rasterio.open(self.desert_file, 'w', **lcmeta) as out: + out.write(tempL) + os.remove(self.lc_file) + # Extract water from globcover if not os.path.exists(self.lc_file_h20): tPrint("Extracting water") @@ -195,7 +216,7 @@ def extract_layers(self, global_landcover, global_ghspop, global_ghspop1k, globa if not os.path.exists(self.ghsl_h20): tPrint("Extracting water from GHSL") inR = rasterio.open(global_ghsl) - if inR.crs != self.inD.crs: + if inR.crs.to_epsg() != self.inD.crs.to_epsg(): tempD = self.inD.to_crs(inR.crs) else: tempD = inD @@ -204,7 +225,7 @@ def extract_layers(self, global_landcover, global_ghspop, global_ghspop1k, globa # read the subset of the data into a numpy array window = ((float(lr[0]), float(ul[0]+1)), (float(ul[1]), float(lr[1]+1))) data = inR.read(1, window=window, masked = False) - data = data == 2 + data = data == 1 b = tempD.total_bounds new_transform = rasterio.transform.from_bounds(b[0], b[1], b[2], b[3], data.shape[1], data.shape[0]) meta = inR.meta.copy() @@ -221,7 +242,7 @@ def extract_layers(self, global_landcover, global_ghspop, global_ghspop1k, globa #Extract GHS-Pop-1k if not os.path.exists(self.ghspop1k_file): - tPrint("Extracting GHS-POP") + tPrint("Extracting GHS-POP 1K: %s" % self.ghspop1k_file) rMisc.clipRaster(rasterio.open(global_ghspop1k), self.inD, self.ghspop1k_file) #Extract GHS-Built @@ -262,7 +283,6 @@ def calculate_urban(self, urb_val=300, hd_urb_val=1500): # Convert density values for urbanization from 1km resolution to current resolution in_raster = rasterio.open(final_pop) total_ratio = (in_raster.res[0] * in_raster.res[1]) / 1000000 - print(final_pop) if not os.path.exists(final_urban): urban_shp = urbanR.calculateUrban(densVal= (urb_val * total_ratio), totalPopThresh=5000, raster=final_urban) if not os.path.exists(final_urban_hd): @@ -273,17 +293,18 @@ def pop_zonal_admin(self, admin_layer): :param: - admin_layer ''' - for p_file in self.pop_files: - pop_file = os.path.join(self.final_folder, os.path.basename(p_file).replace(self.iso3.lower(), "%s%s" % (self.iso3.lower(), self.suffix))) + for p_file in self.pop_files: + pop_file = os.path.join(self.final_folder, os.path.basename(p_file).replace(self.iso3.lower(), "%s%s" % (self.iso3.lower(), self.suffix))) if "1k1k" in pop_file: pop_file = pop_file.replace("1k1k", "1k") yy = summarize_population(pop_file, admin_layer) if yy.check_inputs(): res = yy.calculate_zonal(out_name='') + out_file = f"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/LSO_{os.path.basename(p_file)}.csv" try: final = final.join(res) except: - final = res + final = res else: print("Error summarizing population for %s" % pop_file) admin_layer = admin_layer.reset_index() @@ -305,30 +326,33 @@ def compare_pop_rasters(self, verbose=True): print(f"{os.path.basename(pFile)}: {inD.sum()}") return(all_res) - def standardize_rasters(self): + def standardize_rasters(self, include_ghsl_h20 = True): ''' ''' ghs_R = rasterio.open(self.ghspop_file) + pFile = self.ghspop_file if self.suffix == "1k": ghs_R = rasterio.open(self.ghspop1k_file) + pFile = self.ghspop1k_file file_defs = [ #file, type, scale values [self.admin_file,'C',False], - [self.ghspop_file, 'N', True], + [self.desert_file, 'C', False], [self.lc_file_h20, 'C', False], - [self.ghsl_h20, 'C', False], - [self.ghsl_h20, 'N', False, "%s_wat.tif" % self.iso3.lower()], [self.slope_file, 'N', False], [self.dem_file, 'N', False], [self.ghssmod_file, 'N', False], [self.ghsbuilt_file, 'N', False], ] + if include_ghsl_h20: + file_defs.append([self.ghsl_h20, 'C', False]) + file_defs.append([self.ghsl_h20, 'N', False, os.path.join(self.final_folder, "%s%s_wat_p.tif" % (self.iso3.lower(), self.suffix))]) + for cFile in self.pop_files: file_defs.append([cFile, 'N', True]) for file_def in file_defs: - print(file_def[0]) try: out_file = file_def[3] except: @@ -353,6 +377,7 @@ def standardize_rasters(self): out_array[out_array == ghs_R.meta['nodata']] = 0. # scale and project file to GHS pop if defined so if (file_def[0] == self.admin_file): + adminA = out_file in_a = out_array in_a_mask = in_a == 0 @@ -389,6 +414,19 @@ def standardize_rasters(self): out_meta = ghs_R.meta.copy() with rasterio.open(out_no_data_file, 'w', **out_meta) as outR: outR.write(out_array) + + #Apply admin mask to population file + gpo1R = rasterio.open(pFile) + admR = rasterio.open(adminA) + + gpo1D = gpo1R.read() + maskD = admR.read() + + gpo1D[gpo1D == gpo1R.meta['nodata']] = 0 + gpo1D[maskD == admR.meta['nodata']] = gpo1R.meta['nodata'] + out_file = os.path.join(self.final_folder, os.path.basename(pFile)) + with rasterio.open(out_file, 'w',**gpo1R.meta) as outR: + outR.write(gpo1D) def evaluateOutput(self, admin_stats, commune_stats): '''