From a49d9bdceb17c55dd47e592a45f5c3766e3f9731 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Janne=20M=C3=A4yr=C3=A4?= Date: Wed, 28 Feb 2024 12:00:38 +0200 Subject: [PATCH 1/2] improve crs handling --- nbs/10_data.tabular.ipynb | 8 +++- nbs/12_data.tiling.ipynb | 48 +++++++++++--------- nbs/example_data/R70C21.dbf | Bin 2375 -> 2375 bytes nbs/example_data/tiles/coco_norm.json | 2 +- nbs/example_data/tiles/coco_rot.json | 2 +- nbs/example_data/tiles/vectors.gpkg | Bin 507904 -> 507904 bytes nbs/example_data/tiles_partial/vectors.gpkg | Bin 495616 -> 495616 bytes 7 files changed, 35 insertions(+), 25 deletions(-) diff --git a/nbs/10_data.tabular.ipynb b/nbs/10_data.tabular.ipynb index 4dd0ce9..4ce1709 100644 --- a/nbs/10_data.tabular.ipynb +++ b/nbs/10_data.tabular.ipynb @@ -277,7 +277,9 @@ " gdf = gpd.read_file(sampling_locations, layer=gpkg_layer)\n", "\n", " with rio.open(input_raster) as src:\n", - " assert str(gdf.crs) == str(src.crs), \"Sampling locations and input raster have different crs\"\n", + " if src.gcps[1]: in_crs = src.gcps[1]\n", + " else: in_crs = src.crs\n", + " gdf = gdf.to_crs(in_crs)\n", " coords = [(x,y) for x,y in zip(gdf.geometry.x, gdf.geometry.y)]\n", " values = np.array([p for p in src.sample(coords)])\n", " prof = src.profile\n", @@ -793,7 +795,9 @@ " \n", " gdf = gpd.read_file(sampling_locations, layer=gpkg_layer)\n", " with rio.open(input_raster) as src:\n", - " assert str(gdf.crs) == str(src.crs), \"Sampling locations and input raster have different crs\"\n", + " if src.gcps[1]: in_crs = src.gcps[1]\n", + " else: in_crs = src.crs\n", + " gdf = gdf.to_crs(in_crs)\n", " n_bands = src.count\n", " prof = src.profile\n", " zstats = []\n", diff --git a/nbs/12_data.tiling.ipynb b/nbs/12_data.tiling.ipynb index c966121..4173fc9 100644 --- a/nbs/12_data.tiling.ipynb +++ b/nbs/12_data.tiling.ipynb @@ -234,13 +234,15 @@ " window = rio_windows.Window.from_slices((dy, dy+self.gridsize_y),\n", " (dx, dx+self.gridsize_x))\n", " prof = src.profile.copy()\n", - " \n", + " if src.gcps[1]: in_crs = src.gcps[1]\n", + " else: in_crs = src.crs\n", " prof.update(\n", " height=window.height,\n", " width=window.width,\n", " transform= rio_windows.transform(window, src.transform),\n", " compress='lzw',\n", - " predictor=2\n", + " predictor=2,\n", + " crs=in_crs\n", " )\n", " fname = f'R{iy}C{ix}'\n", " data = src.read(window=window)\n", @@ -252,7 +254,8 @@ " dest.write(data)\n", " polys.append(box(*dest.bounds))\n", " names.append(fname)\n", - " self.grid = gpd.GeoDataFrame({'cell': names, 'geometry':polys}, crs=src.crs) \n", + "\n", + " self.grid = gpd.GeoDataFrame({'cell': names, 'geometry':polys}, crs=in_crs) \n", " return\n", " \n", " def tile_vector(self, path_to_vector:Path|str, min_area_pct:float=0.0, gpkg_layer:str=None, output_format:str='geojson') -> None:\n", @@ -280,7 +283,7 @@ " )\n", " \n", " vector = gpd.read_file(path_to_vector, layer=gpkg_layer)\n", - " \n", + " vector = vector.to_crs(self.grid.crs)\n", " sindex = vector.sindex\n", " for row in tqdm(self.grid.itertuples()):\n", " possible_matches_index = list(sindex.intersection(row.geometry.bounds))\n", @@ -322,12 +325,13 @@ " if not os.path.exists(self.rasterized_vector_path): os.makedirs(self.rasterized_vector_path)\n", "\n", " vector = gpd.read_file(path_to_vector)\n", + " vector = vector.to_crs(self.grid.crs)\n", " le = LabelEncoder()\n", " vector['label'] = le.fit_transform(vector[column].values) + 1 # We want the labels to start from 1 as 0 is background most of the time\n", " with open(self.outpath/'label_map.txt', 'w') as f:\n", " for c, i in zip(le.classes_, le.transform(le.classes_)):\n", " f.write(f'{c}: {i+1}\\n')\n", - " for r in tqdm(os.listdir(self.raster_path)):\n", + " for r in tqdm([f for f in os.listdir(self.raster_path) if f.endswith('tif')]):\n", " with rio.open(self.raster_path/r) as src:\n", " out_meta = src.meta\n", " out_meta.update({'count':1})\n", @@ -415,7 +419,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d8375a086cab444694f9820081d1cc51", + "model_id": "51eaf1ee31ff4becaca80ee5acd7a233", "version_major": 2, "version_minor": 0 }, @@ -441,7 +445,7 @@ "text/markdown": [ "---\n", "\n", - "[source](https://github.com/mayrajeo/geo2ml/blob/main/geo2ml/data/tiling.py#L111){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "[source](https://github.com/mayrajeo/geo2ml/blob/main/geo2ml/data/tiling.py#L112){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "### Tiler.tile_vector\n", "\n", @@ -455,7 +459,7 @@ "text/plain": [ "---\n", "\n", - "[source](https://github.com/mayrajeo/geo2ml/blob/main/geo2ml/data/tiling.py#L111){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "[source](https://github.com/mayrajeo/geo2ml/blob/main/geo2ml/data/tiling.py#L112){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "### Tiler.tile_vector\n", "\n", @@ -503,7 +507,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "78a8237ecfe2400aa44d4ad5e987a245", + "model_id": "36ab9db52432439ab7226f8c49d249f0", "version_major": 2, "version_minor": 0 }, @@ -527,7 +531,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1b8e2bbc8a1e4cb798dfdcd5ab14e783", + "model_id": "5eef8bacd5ca427ba2ce5da0b52fd34f", "version_major": 2, "version_minor": 0 }, @@ -562,7 +566,7 @@ "text/markdown": [ "---\n", "\n", - "[source](https://github.com/mayrajeo/geo2ml/blob/main/geo2ml/data/tiling.py#L171){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "[source](https://github.com/mayrajeo/geo2ml/blob/main/geo2ml/data/tiling.py#L174){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "### Tiler.tile_and_rasterize_vector\n", "\n", @@ -577,7 +581,7 @@ "text/plain": [ "---\n", "\n", - "[source](https://github.com/mayrajeo/geo2ml/blob/main/geo2ml/data/tiling.py#L171){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "[source](https://github.com/mayrajeo/geo2ml/blob/main/geo2ml/data/tiling.py#L174){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "### Tiler.tile_and_rasterize_vector\n", "\n", @@ -619,7 +623,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8d84a2c087094c8890b0c0aadc011ad2", + "model_id": "c181d67bfe8a4ff3b5c6661d42b71788", "version_major": 2, "version_minor": 0 }, @@ -644,7 +648,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "08264d32474d430790dec1232bbd4d30", + "model_id": "1620997e6e7049488536406b7181dcad", "version_major": 2, "version_minor": 0 }, @@ -718,6 +722,8 @@ " \n", " for f in rasters:\n", " src = rio.open(f)\n", + " if src.gcps[1]: in_crs = src.gcps[1]\n", + " else: in_crs = src.crs\n", " files_to_mosaic.append(src)\n", " \n", " mosaic, out_tfm = rio_merge(files_to_mosaic, method=method)\n", @@ -727,7 +733,7 @@ " 'height': mosaic.shape[1],\n", " 'width': mosaic.shape[2],\n", " 'transform': out_tfm,\n", - " 'crs': src.crs})\n", + " 'crs': in_crs})\n", " \n", " with rio.open(outfile, 'w', **out_meta) as dest: dest.write(mosaic)\n", " \n", @@ -783,7 +789,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "6816c16c29cb483e9f81ab6adb84818e", + "model_id": "28ffa470c0b6451fb0fa4c232bdd2351", "version_major": 2, "version_minor": 0 }, @@ -814,7 +820,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2ade4ef218d24dfca1ad7f5d48470b2b", + "model_id": "4ce5f76ad1634f2b8d1035398ea14aee", "version_major": 2, "version_minor": 0 }, @@ -893,7 +899,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "aae41955234c4dbb9c343e832f1019be", + "model_id": "9f2f690239464a208a26423622586de6", "version_major": 2, "version_minor": 0 }, @@ -918,7 +924,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "31c511247bd647b29960408c71852eaf", + "model_id": "d309e28e01b64a6eb4f2e91bc1d5765a", "version_major": 2, "version_minor": 0 }, @@ -932,7 +938,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "0901b05ad37040ef81dad72ff9089d42", + "model_id": "3d90ef21fc42458f8185f86312299656", "version_major": 2, "version_minor": 0 }, @@ -965,7 +971,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "cc0696a910f349cea9e468ab7c852bf0", + "model_id": "54a784a96e4b408e8e72760f55a1f252", "version_major": 2, "version_minor": 0 }, diff --git a/nbs/example_data/R70C21.dbf b/nbs/example_data/R70C21.dbf index d4a04a36248920a063e6b5f030287d36adbfc5f5..97c8e359c8a948b1614fb0c08e082d4926c70022 100644 GIT binary patch delta 13 UcmX>ubXubXRqIeyOCXJ3?<{+ut7v-e(WoqhKD zt>0d2X-!Q`O-;3@$|hA*rzi|`rGEActXyb_;-`8fWBk&-aRbmM9wUyRRWGZCK|ya1m* zGYy~0lvTIjvtU&QJ_~ti^LgWuXf=bc9vbHfq?Gtl$0Xxle?roz=G{YwQ8kT!HX_cr ztnnqJrueycj~&@llaw$zHOLpErhxNanq+@6KZu%qe!7ItA4IGHGbt72m7VoV96k>SIXzF#HvNoV7VZiU_-$ zwb^nSaGpDmXXF+ag`bbE&F7KT{qQP{sO~o)_p+IX1telx^JJy_D}B6w6^mFu_(I=vK>@_sb=c!`A-n9r=X zsXKh{l^UKxqgc1?Jbk>;7?E{(9r9Y6UABVp=T zrAd$+0nJEs*lgM2!;fi;rIXBmyO*jC)g(i+u^2Cz2vIN_DnW7n~VRl@Iu`eQ5# z>Pna!(r#d<2)_r)U)7|zWsi&t!3G8SF*k!=t6l1-i)+ z311E`<)UPIDT`5yE@^#)-vzCutcvhukXFW45dJ%8DPvC)z7(d+CVioLIg5ts*=!&r zOl5tcZ5CTg_-|3i&Jcbl?A<}VV0=qlFn12CWA_n$2fVF|L;sVwA^I%oBD})-@hlPk z+z$1X-Ky>15%gyXblsyyS!kTAZm40*V#mohvBXTX-af&YO3Z8Fu_yE>>zfbQQe2IP z`WN*mz?B6b4I^eM6mQl;R?l~`Xf+6XJ6Sd{Q^5HN0%HbY^(SmOtBo~B!?*7sG-k4d z=Jr={yYo#n1`;0Eqv3~l*&}p`IZ7x;3i$zICJBIAi5UR&2SM3M-SAv9?INaMI))PA zm`^%p>r;2p4^66BS|q(_G_Z0OiDW^Fu;c6unqDd5<%Q06wuqPmMbNx3yMrx=G7UY- z>s9ZITRL~%Z3{3V2E^}h*!Z&cC^0>k&KwODjhY?KXt>wXT(GH*d4OA577Tof4TXy+ zqp{`yIM^Ya?Jr-fKEzHEvmZ=(iY36B!)zZhe)=HykUo~ui)}UVtOTmQR=qZ-(+J{c3-O52av;2*1RU|YL(S$=C37{H#Tc=m?3N` z7rm@G;i+a#hpuJ~!*+$_bL>tnMh)~ZnYB|JO3YtLkhbj9u2TIyw5N7z!-*LwQ5*2G zcH@Rf#(<^MPGTlXDm}TA8rB$#3MwU7%U_{_O7W6PZ|)cIe5p$&EjxAod zhccM8hxUQb&f&eOX`xP7yNBXoM!@bpbhMfxcohYD3-u9mIHb1F6~r6{FSk%cdFZ9^ zAj(4|%6Gp;5#_-Wxnr?LX8pwu5P8BY^BS*MuPoPn+*-@YeTNzm&Kuxrr6SP41O%J$HJGY45IF=Hgmws>nhVzs!P^-;%3NY{1X zRjO-eLv~H3!Eh`1FnyCXXf*`WSp~TmW3fhh7`JvycaU`z@QY zUnpuu&iGo9GxDly(wQx-$eBqYRtS?1TeTsy(qtjh>_F0t?4drjLy(4sNYkv%kB&7- zue;_K4ahJknZod{sh5Ho4Ro4JozD_ekvO1aAK}%~`zK2+~$rK;!5Lb4bCiS<1=jk!l%o;Bkdc<`~7_CwuvXS=j(5leS8s&_V*i>Ia zMM5sH@MdBz<@weLr(IX_WFG#;Dbi?|s4Ka49tI(?)`Z)2CCBQBuJe$QRl_uDh&1ZyPNO^vUOGoYRx|d{aE~w3H%IhbbC%pbf5CVGXEx!P;50VlKSbTF;tli?96Kq!U?CMs?btY)I(PU_*(L`QeUt#+`GOB)?b zx)k9XGHh&Yc2QRC5*qOCz2{bXLTFO31+uOZ5-h@NxRcOk_zZ*)D{uG z*!t>UBvr)&&4LHBC?2Q`^lW-J)%a~Po+lv(g6B!dfxu-4E*}gXSRfsE_yRT*$Ig>_!spB2 zJbD4!3G+Xqow(6jcEN5Z=E>mLsv#H2hwQ%De6A$2kXgQcqM+_lB|g43Zt ztcN?+IvngUT}W;UkMaEINk#SXux$|?N{MLnvapaXF^dEuBf)rFkAYaOZ^JB85GwVi zj!hltn91@(g~!wlD}AFrMh(Z3n5Xwo^*UE2!deqM9^*;fVV&Nhd+qX=E347)I^5#p znRo?<*6D^?EbB{goj#hFIkGZsslyN7Rf>_DRHq{W>vSZbnGOGbTXZ&Au#T=1>7w+PNU%Tp zyQgrwM=&yQy$=vyNC!fx<1i-6c+jwDmfoS=<9nBmf?=tcL`T9m3bw zfRQ;mi$y?ogPwyqiwuU==k&0v#sB2F*(${-^L*-=>;9{|EFvcCHP@@oM17d!W^EZ; z)l@%^2`CoHjjLbLv3s~#EU-u0tz-AFP+lnGY5c~mdju{w$qHjj+Fr*{1X}bim$^Xi zKi2EKe-fxqBj1F=N_kaZhycuo`quus3kLhu>=0b-Uz>ofW& fHAmJ@*ESuSojEXeo4{x`v}_ZY&4S>w`quvflC^a~ delta 5402 zcmcgvX>?S_m42_PTGg#q>uDSs?NWnSjh5~fQmZ9EA_9b1v|yYVvtwfi2QbWd*aDIo zhyzaSSI~^E7C;h>@=bF0I?Tqb{v8@p^PWKrF$A>cgUyd4IiIAXpr|iW*gmyk3I^_0lwX3k{xND&qsInm3d2RG ze_Jr$aCf<=m(1~32DP?mW%(n^TVr#-)?$weD+{$l_9%#>gRxOiI}jWBwN85!Ky62C z6x8;|MgeVKY~UTiF)6aF=h2<+p_5nkybm$=)qrVftNcNW)aS(U6vr!@b^g z;&jTpKDqqqRduzq=2g|!xcgEf!=p)!X_!Of8q+d-Uz_;{nPo?pqFGjSb7NX4v)>E@ zKFBQNYt`T#(xX>#ht$*m(fC=S-WI*_LPKM#rX@13&2`&aYf)1SzuZ?lq^7Y;YX|$M zL0fyEe`;Ss>*}AXtvb~dbHFz4=%1>!`{i^jk(9C3`});t@m@8>#IUvBm>k~{pq+7R za=$4z{{K>A#kZCk*4=D9!>OlPX=C)CQCCWw*PFL?*c?AJ<>_->BVyA+s4vjx>N&79 zS6`qHhYPYB2I+a&`CtQfuI!j;z&3Sf^-n8<&^=C{qi2GdkJcIR2ic`Vb%DOXo!Wm~ zWk6cho%QR_c-(DS4!ng-`64MDVnrwF3A)}2os;xzy#;z?cL;9D?qG8{wgZZF)DUHh{Y`q>JiCl%07G)}8DS5Ev0cehcVINLLyRt@~s z$J@VUT8~x{|Bu~W$&^KW<}^6tW}P&_a-0tMiX2wXJo2xjEVhK|NtUCB-ZL!LiQ8L_ zCUVa>!cO6kT_d0Rh`A1~ho~3EzDEoQzDsTq^Y7t>!z|ev_a1phC+0KIb(P#B=HJ1k zUvgz<^}bI^b+$EZ{w*~898H&Xv*9|6ILx)s*r0o%n*!HP@=(MDVA0 ziAR-79f#1R@+0gcV$M_dMmxi*IBMVf5eq>qL+kG=^sc?ca$xZtjQ{h;@DRR-6%g}# zPbeLW`K>N`Y`XCVn%_mTqF_kVSA|JL*r3Z+}Q{X(km!?2f6}34z6gE{+ zH?($0PFB%=PNIyi7P(NrA32G(mx&lBVM~VL)9Fzn9+Y4q)2gWlZcnE-n>*?K7+#~n z^dq5nS&|j{1VhUVx>JJ$KTUHo7Y=W01>IeXDe~!I$)^Tl&XrW!M9hca)#H*xa|V#8 z@#X&9xqN~>Pt1DBq}#--i;;*SzDrNC4z`t;wJ`?ak?^8laDdi~m*rx*N*vq;6Q@cgPK}#A)?7i^5thU%u(q+Vo-8;|_ImZLInOLzwsN`r zaLd13c&D8{Pej~(_-+v4P|$4LC!t{qnv*?9%lZ2$(vm8CP~w5%9rPyK<`Tqe>9L8a zD`KALL1I>4CLa-@sW?c9HRBB;YeA#6<5xuHR0G_r!3uP<7dOWcc;M43xNdDc@>sk{ zoN((3DpUZrz{x6PuWj1dOBk%`B}1UMna4~!dI?5jGrZkP5cVHI^_v7?-vn3QBnbP) z`>@|2<_1+UcD_xpVystSFMEd^JzK|Q&ZZ-=j<7Y%e}r9QK43j$w=ST5qdwI7xSFoR zOd6w{#F+uxW>R!=bgbZ^#M(X!lMh!+-hwMWm`z9FijmUMXNef0yx3cdX;&<w5yN1|c$r`E%82}EQo z1kn@`%u)z`T7u`~ha`fT7G@a3Yh_p;lu%hu(v?SlC_#^QluD1%lt)rx?JT3Edh<1c zY3Qf7(R_xg582I}Lpg8<6aZgHM73QQP#On62fmnyG8p~e7=BTDKm0>wwab=Ytf7$A zN8Ir436=mwePo*aVvR}gj~-j*n_4p^;uF&$zJ=rBssW4NYvr-UMk_aom?m|(fa1$6 zGEq!bHMHXkq)ppCE;Cn5QB@RW9!U92R?$l9FQ3_D{jT!b_#8=h?9Y;{la<#}VjcfN zl2ugLMHJ+omh(%JiK22}JAkbEL0s#sNgMOF|o z!H!VRS0t97o$}jxoAm%Y#U>E(fWqzWE{Y%OQ<%{r)p~OR4eQ7a7o2aS#d@A%4R5D@ zBAgIzryHq5q^Ly7EJuuelW2yH)n^jTlH)j)Kq6UD&o((aiKggP(qt+TL%~^2i}9gG zvteO59ZQ54c9hc%sffXlK}92oMI#shUMP#kcLuN$uN<-)}?D?Gkdc71#aIh5Tx2r&vQpU&K*GASub<^%0HwCi<4~d9mJDA+{Is@6U7|G+`0QXegW$h zWlP(+tg!s18!K!bPcQR2e>@x3?&g_RRXbmfCv6e6Dp-xX`F|2oW5>yAit=hbDN)lB z>|0^8H7p)gH10gix#c>-C5`7P8c#gR*U*jP5%nU#8B*vtUqrbSaOkfvdC(&bO1t=PRc5>S}ai*t?rb7EmpwDxWSw*-bKVBg$_zQka~b4br)KF z2l!4I7xF@P*Kg@7v#gyGaR#OrTU0P&hnRt_@R2LOh1Pw*IV*_Pg#ZM YxGeK4)TM0I@6O9h{lLP1To&p73zC9mpa1{> diff --git a/nbs/example_data/tiles_partial/vectors.gpkg b/nbs/example_data/tiles_partial/vectors.gpkg index 7a581e50518c24365d964e1971668cacc4ee5a96..f31e1701886e3d317b40a0535564429e74c6bca2 100644 GIT binary patch delta 7419 zcmd^Cc~BKsy6D%(l~#z~)tVoEYxHOFD^zP~_+6t7!EcE+sr{iM zhX!KZZu>;AebU}5rr5Ug$Q-{p-c@Q%{9wgBVzx75WGekRJR{udf%w5_D^>?5JZ;O! zNK@w|#EK2-dTwz!M>_E@#oHU&9u~JVU40KNHR^?@EN(|eN`@Mo7%Q$*J&D~{b34*gUFu4-RI6KiSW*$t zf1_os`u83dPx=V8EGbr8qmD{?dNo&ihH0r%)u${jM@D*@`Vr2lRIev>2kLU9r>IfM zL&XX;JGuK}E=OvrM_rd3E0(Lfdsy75>FQ~;l&PJ`-4}B@GDfIF97Dy`>Qu+mhO`W| z5e=p4n~v_gI2|dj6!lMzSg}NX*u&y>rm4xpW5pu%h2iF$$7h|6+iXNjp?a`~CC%k- zzc_q>()@j(pIypxWv2)oX3d%H5w2P_mvF_ZIfbvynkPF|cwp61g!@*_Bm7|2+_=hj zR?Q>avubYPu2pjhomR~$+%ape99-~QUd4@XTQ#?E%c{AAn^w&!+%RiSoN?W%r3lxo znp^nh*Ut2RI-KU84^FW1tYBCCnBu35S89}WrBnGv2~m=iQstuZx$?16pv+VLOF4>J zXScFL*{BRc{U80+nUH3ica@)A%yVWt{h4>QpFI$d*C{B|1UgjK}w3a5hvt7))hS!`nZ#kdtmTlf5KF7_3W}IR3)>GS) zoq{XZ@u~-_wN!Oyr((5+S5pFP-gUg{!D=O}_FiUz4wV)7PYxon^ue-nK$0T=zBk(${=VzSh4)3jR57l(yVl3C6YO zb!d_VS+12pQU^S$3g#?*ub!USFW7Uq%CJDxQ_}G<6@{clQ5# z-VWyP*QJ7OsnIJ&e8_elS>U(8y9#XaB2|jvSiHDMY=Ec)Eb5^ii=aB$Zcnay4lf!H z98DDSf@-Y})RoyG*1(QoIDQ@6q@o&f60ukdZ(|X-#y1#3qoM-( zC*!DcYm^IS4HstzmOXABPQ>Qw$4zUEQh3#YlS|+R6-6**IIdA>jdB7`%Ii&+sy#Sg z2yP3`H+nn8G|cMJuwjfC2vf(1Vd5344-~IL8x>!}aVj3bWh(CP8jIf_Acl(XA&ZLd zAfJkRuz`xZ&_+ck9H-(AT&CjNU77gpfEX%nLlzacAfJkxuz`vj&_=~|I8MbixJ<=2 z?`Mg-oD$43v`TTSC_ygldr1t3!&43wOfdR2n(k(kyIm5D z=T6cP<7AUK@q<2V@eJsHxm6~#_i6iIWmB%SDc5gPuK%XofcJCdfKUk7A>WjA8=Reo zTeSWPF+}W>FH1Uu?GvpYBYMAFC`!6ao7q9xFDr2HP5FYP2S9!j3pa4u6tw$8(-c01 z8Me3N0odgST@Gf4(QnIDk}kpFx8(*&7vb1c?94hK7fZSTnbWXw{D8bt(wZUeKjrwc_28(vj~VeCFF{z4}0 zEauuVO&Nd2pui$dOX@L3en~-rg+|?9WYW$8uAMXIc|3yQ(m5Hm^CFBnk0gQlrgqSZ z+L>o+XT}AD4|CzY3kVx|Dc7)7?0N7gj8jA9ts8UwRZKAP4UC&rM+P)#0hl3*>ooXMIXAckF( zv^rCuK%_Tp&zTTwO_dsp2Qgx04Hu|3m~gHmf!ZNv2!hh8xk~p8LA2UhuF|MjvaErY zScI#=xO|Q`3?{}gOQel)%o3>@$GAwL3;X=awMs6~$#D!xTEPVx6VKj}v~sS^s|f@> z9-(VxT$|WK+FZ@Gc_o1<(2|JwwNe8!58~HKIGl6ICY)k|vq{p5IFdcXSrPc}kZ(!a zGh7GCFHr{(J4pw9O&vh%YZ#m8Us8tC`fyoO7aUYc)}f{>4D7)SrzxhQX$oezU6kRp z5H9P{fEi8;CNVciT5qo2EDhPK1#$Hr*GRp+AWkRs26FnEWV!x^Y>_2;w;Tk+wz1xj zx?7$E10Kmk;qV)BZ%MOp+FX4D8Llx-8ze4+)4M6T$(%bxWq555p$jl-_L3?!f6g7# z6xS+WH2ibrQ+*_DF0JT*p9&~t%%RgbCTX*Il=iN`D803u)_H+vi`Erpw#c&rv;z0Z zSVaMv#q$Q*QR0PW-pl2A@6{qR@6F@^I#c4ydzXtT@6F(O&+Oh&f_ZOo3FW=%JaX8L zk(&C`9J@M8N$Q*ejB@5;o7KF)R2OJi{=^ znd=k4t8-|~a?bT>bBXKAIM;8?M`<|bF?6iJ!28x8J;~=hzct_F`BKhv9H_wN1;q0u zT)=1#HnuD@1-zIu{?a0o@r$^C2P|a_VwSKHNn6OdZ}uEm!j?$d0uC2@5blc{?%t)W z(D3h&Gth}4kohfzc5pY)ew`fje@NOO9%Sr6EYI;Ex8B7dD|clXu6N42B`wAx&RzMo zqzyFit|ab7TXeaLbjkdl=rX{f3-%aUKbUlhGVq$DoZFu><+BGQrAW??`48#rpCwi_ zNm>LE0`(ORzx|OM4(sM{^CRCWo%l({6R?x5L(=+L4x?tFpi6Fj|A_1#`no?GEos9| zV6RuQbCQM)E6N^m3iZB?Td(DIo#<>Hy-3Z!k31b#eO>L%VN&YF^id3UPwQ>@+x6- zKZFkz9KJbvTA1&GY!1T<^3&;<^LB+HMle%LwzDEa9mR#{|RXNV+DA`4vw+#+LphA1E?DjxsOILVm>$7)Z*AM&-aN z*cMIq{7Mrw8W8mg6ZPJKtjGxHkVoK~_F2drhuC!(x50P)DdW*aJ)CRaFB>(lhjGpC zoq(sSOkk*!Y_?m{?bbtk0>f8#KLg*`s1yAeu6yTXc1_aza>=(&K%cXoC!h5`2EMFO zB6=uS`)4`GK3(BLpFb5(L=Pd2H%U4QZ5m?Gd&6(1VbJxU$3YK+muC2q)QRbsr1W0o zaEGJ^a$oUb9|#3A*_)<1^dTHhWj@M7Pc)_$u#xCNf9qMW41=RbTF-*zc@PgTuON@&fUTtVP-t069>sR^s65~2QDY$n%DnnoS0QP-?d~IgA#y?=%w3Hs z!e@g%$a*#u)3c!n$*(_WJsXON{CW%z>y=`9Hk2YC^nphHY9hZLZ9N;xi2V8h=qe}I zqpW8`1$!XrqwqJOc{(F;8G71R;x<#(&@vg6#q4mXk}3F`&^)3M_?r-qC{Ryl;TajC zHx@pt!6DYKLbh1a)9_az#UT|=)lghgU`j2uJ@{*o;^Owjg`Jml{58&h4HB`O9Q3jV z0zDk6UO{gX8_^s0dIIghUxTKj$>eC4q$hFr5*pFHlTEAv5l-dnm2aT+CUZdBx8oA? zy##0ypRsK_&Uo<`CZLH1w3vV<_yGNFBLeF9H&lwA4U0CC@8jVw8wtxen7@hIS@6Xs z!jkzIO9W_}eNz8)v+ph3@iLw9SnkxW&E(V=4iaM;2n}0M;QDBel;UoT`**e;VNC0` oZsi8wx&ybdS0z1%FYs;)E^uZWEilEvP+^A9pEqi^vn}HP0gpT0a{vGU delta 7442 zcmd^Cc~li=mai%v^*w|~;qveW0f`3LA8!R7Xh2bn#wemDZWvr~199IFe8XsTG{&mx z;o2P?(ey;)5>(XE*d2`%+hmOCB)ueRYkSgtPPd(OnvR*JV>`~h_0>~k{+e@g&h$CM zIn=k@@4Mgq{p#LdeQ#r1>c+Ozok_i--xCC3E&iwDza$9p7t~yz4`_*|*Y!>3D4 z?YO7N2{HC7g44cSurKfEk+#4dH7T5-U&$P(ctaDto??HfIN)>pJRbdi=0F>6o1_m- z8;^^!^?_NBFDfYX>XWk4#S(q}W0nH9FBH^|qNP~>S=PY#M~(~J-hgh)9wdhJob0a7 zg6@E~NUzRL7mM_kZWdpGS3iZ8Lj6WJi`N^}9XaV@f$q=g>Mh_76@>J;XbI}ux><^F z&yUd((0|j-QsDRKb-C%HUoXjh+*=?RG%Y?|?`H88`1MP;$E$yl+ZAZQ?Fs71d4oic zUY^(0ncrRH59urO(#3rJ)ozx8!hn7iEpGj8URP&+chIXpCd_!H~0g3BN}q` z*W8bv;`RCTU%1o79R1#77N0xhE!6Y!)5R=(LVnjhK0GC)Z$e9^ez=>(Ul8cHnLkx& zJ`)q=l&ZYt0pYmW^OXCALsri#9JG2K;egrmmj{J&RxcpDW%c~RS+nOuFNW3g3#YA~ zPdH`uyuwMV=Mg&0o_91lJi)to@G-0B6WXnwS9sIvd4!{8&x1RTSiOL7*y{O&H%@p8 z|Igtxe|K;kN}S+S#Aqc`nXHIPigH)^y)st0seGbbR(_-$SB5La%4($-s?V#+PGyr) zho#*yyzGPcW_vg*$|+WP%6&}n3$h^C!v!?qEB8m+!`a;Cx7))Ryz8+kvjv9RSvU~o zjKOtY7WVU=hlTT~=Pmcx!=9xdp;w75@=J)^~THFffB17{C z;jPHfBu6;QhZdQ_naGeYHq2o&mY<1GI2AGTlTSv5{NN5gv{(~PM26hg@yL)Ld@M5L zp4%fs?)lBgkUV?pgrj_HQ79aV47uyWksH=(=V!6|7?qDcGObk}z)e+g zJsb$#F-&l@IffZ=9?CwW-Y<3RU(5=lb8d(S}9gb>J#wXDiN#5g*D|Q>jFvbi-qNc8LP#)l9~vY zR*U;Z6)IC%Uua$SK2mLMxU;f zqZ5qiHu*hK(gxD$Zn*zv`J$u^fOo!@&r8~q@G)af*z=97!0UJA+meI}y%+2lD3~wqt+o-&!MoEvYr! zq@ykDVlqc{;WI>~z5p555S98oM}>*uf-kPiDd6sfAX9J1 zv5>u;#X<0fTmdPmY!I|xm*XUL60OgLJJ)5*oryems!t&Bt2Yq1I)Nw9%QrFb@BNC< zRT-;p%9QTodAggXj9<$)!B!)GC25n04}6bvaEEIqz4?i-W0Eyvq|PEfv$#1^Jjqd}x*M;6r@RgKLowZ5*`MA|KjVI6n2D z&XsgfG_-CyNQ;NE4xfGnCsE z931XYAY+Mq?Q|NmC4gwgeEJ{+{Jj3qo zqYTRSdN6dxc`)J$W|L78MU zgUjfLhd3V&xb=pNGCBx`9HN3rH)Ry9ST9eRGCKA!mCHalb(qR!0EfTi2*OvNBD6BqBnb0QtLRyd-||HN$O$))sm`gk)<4c@C#AemmwPq zEuq92qs+yKjRgh@C9yG|XU9iL3`L`s=fvcG3<1xhOu$RpTuK5YW)7T7rhsR63Aj0x z2ke52DePrQokg}zNu6m0jOqk4`?I~RHLOfh8#$&154$F5Q9Ma!dD%b=V>wT|-9Co2 zFXL&K?8h)Pi3hmLj{#~T4=^o27QxyRpgqmjo*;$Z1T8@dbwig>@l=yr7c>?!tmXb# z%X%ThT25t|@Brtc>bpeP0Jk@?3bwvVac1SCRp z1&f6%)wsH~0x_D}o=`zROK5#AjILmq&&52Szj_9<&OMUtlhlxbmjfGyUSxdwjG1hO zy!3F51GbDNum!yIXvWz3teI><w#8_U_<)j{9?=OFeZ$=Mi_*xe^d&Ym@i zO+H0(HX2$^k(`Zs2;T)GjYvMNGa~u4_q2@p^bF675eDhpNS+r1&Y;Yz6 zNR~$!pPV&ic{tAv)6{Z~WVzhH+btEsFyrEn&6tN$%vb|u@X~p?$%sy1MaafHC>@XK ztId5FTp}n{n3LKZPJYw`ltpbeCx6dGtY>8+$>Ic*MQs*;4SOfDKT6t61FsS&i&_mQ z-}3_dgQUI4GYXfWB2K0x)n*uYfj~vns(CVfG?fHrI)4L`r;*@HqhxB5w5gOwc$b(0 z=cZGble>huVFt6V3>T|W5w#a6;7&<pMQd&Y?_OBZS{~2Tuk55@S}rFG7b>u&2bQ&#!x=+6mUXPjST@huTXCdj zS{A1*MWNb`$J*91Id|sF!FV=9(lR(&oI$jMINCi5s|A~f4VJW}Jl2L9`aY~No}I~x z(eZq%glEyFTC0R-6Gf|`Wi|?^whChB(EC^kFU_I%vjW&$8ZU>Ixx~-1F8tKjMEGf} zMHb8(?%sK*nO#ngXn!qAqSnAUOPkNI!k1VTz5wN}b3SUnR&Q1K0+Y8otHKu`Z!L?6 zx5Y-yV$yzXkyYV!CR+>PK|O`Oz^d>i>|05<aMWeM2EJoDLtY!pp4n%aBz|dKCULL>_br-d}~h=pu|-O=AIn z5fU%W5ndSHIpe8zX-EZ)dMadSC9~NDDgna636jG>|GR NC*5hR-odtt{|#!sdMf|` From 5133272111fadd445a235afaf4330f90d2f050c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Janne=20M=C3=A4yr=C3=A4?= Date: Wed, 28 Feb 2024 12:06:11 +0200 Subject: [PATCH 2/2] add library --- geo2ml/data/tabular.py | 16 ++++++++++------ geo2ml/data/tiling.py | 22 +++++++++++++++------- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/geo2ml/data/tabular.py b/geo2ml/data/tabular.py index 472ad05..57fd85f 100644 --- a/geo2ml/data/tabular.py +++ b/geo2ml/data/tabular.py @@ -56,9 +56,11 @@ def sample_raster_with_points( gdf = gpd.read_file(sampling_locations, layer=gpkg_layer) with rio.open(input_raster) as src: - assert str(gdf.crs) == str( - src.crs - ), "Sampling locations and input raster have different crs" + if src.gcps[1]: + in_crs = src.gcps[1] + else: + in_crs = src.crs + gdf = gdf.to_crs(in_crs) coords = [(x, y) for x, y in zip(gdf.geometry.x, gdf.geometry.y)] values = np.array([p for p in src.sample(coords)]) prof = src.profile @@ -104,9 +106,11 @@ def sample_raster_with_polygons( gdf = gpd.read_file(sampling_locations, layer=gpkg_layer) with rio.open(input_raster) as src: - assert str(gdf.crs) == str( - src.crs - ), "Sampling locations and input raster have different crs" + if src.gcps[1]: + in_crs = src.gcps[1] + else: + in_crs = src.crs + gdf = gdf.to_crs(in_crs) n_bands = src.count prof = src.profile zstats = [] diff --git a/geo2ml/data/tiling.py b/geo2ml/data/tiling.py index 7e72fb5..1663116 100644 --- a/geo2ml/data/tiling.py +++ b/geo2ml/data/tiling.py @@ -86,13 +86,17 @@ def tile_raster( (dy, dy + self.gridsize_y), (dx, dx + self.gridsize_x) ) prof = src.profile.copy() - + if src.gcps[1]: + in_crs = src.gcps[1] + else: + in_crs = src.crs prof.update( height=window.height, width=window.width, transform=rio_windows.transform(window, src.transform), compress="lzw", predictor=2, + crs=in_crs, ) fname = f"R{iy}C{ix}" data = src.read(window=window) @@ -104,9 +108,8 @@ def tile_raster( dest.write(data) polys.append(box(*dest.bounds)) names.append(fname) - self.grid = gpd.GeoDataFrame( - {"cell": names, "geometry": polys}, crs=src.crs - ) + + self.grid = gpd.GeoDataFrame({"cell": names, "geometry": polys}, crs=in_crs) return def tile_vector( @@ -139,7 +142,7 @@ def tile_vector( raise Exception("Unknown output format, must be either `geojson` or `gpkg`") vector = gpd.read_file(path_to_vector, layer=gpkg_layer) - + vector = vector.to_crs(self.grid.crs) sindex = vector.sindex for row in tqdm(self.grid.itertuples()): possible_matches_index = list(sindex.intersection(row.geometry.bounds)) @@ -198,6 +201,7 @@ def tile_and_rasterize_vector( os.makedirs(self.rasterized_vector_path) vector = gpd.read_file(path_to_vector) + vector = vector.to_crs(self.grid.crs) le = LabelEncoder() vector["label"] = ( le.fit_transform(vector[column].values) + 1 @@ -205,7 +209,7 @@ def tile_and_rasterize_vector( with open(self.outpath / "label_map.txt", "w") as f: for c, i in zip(le.classes_, le.transform(le.classes_)): f.write(f"{c}: {i+1}\n") - for r in tqdm(os.listdir(self.raster_path)): + for r in tqdm([f for f in os.listdir(self.raster_path) if f.endswith("tif")]): with rio.open(self.raster_path / r) as src: out_meta = src.meta out_meta.update({"count": 1}) @@ -248,6 +252,10 @@ def untile_raster( for f in rasters: src = rio.open(f) + if src.gcps[1]: + in_crs = src.gcps[1] + else: + in_crs = src.crs files_to_mosaic.append(src) mosaic, out_tfm = rio_merge(files_to_mosaic, method=method) @@ -259,7 +267,7 @@ def untile_raster( "height": mosaic.shape[1], "width": mosaic.shape[2], "transform": out_tfm, - "crs": src.crs, + "crs": in_crs, } )