Merge pull request #38 from wri/improve-context-layer-tool

Improve context layer tool
wri · Dec 10, 2024 · 19aadce · 19aadce
2 parents 99da528 + 0d16afd
commit 19aadce
Show file tree

Hide file tree

Showing 10 changed files with 196 additions and 121 deletions.
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -15,7 +15,7 @@ services:
       - .env
 
     volumes:
-      - .:/app
+      - ./zeno:/app/zeno
 
   frontend: 
     build: 

diff --git a/tests/test_context_layer_tool.py b/tests/test_context_layer_tool.py
@@ -0,0 +1,14 @@
+from zeno.tools.contextlayer.context_layer_tool import context_layer_tool
+
+
+def test_context_layer_tool_cereal():
+    result = context_layer_tool.invoke(
+        input={"question": "Summarize disturbance alerts by type of cereal"}
+    )
+    assert result == "ESA/WorldCereal/2021/MODELS/v100"
+
+def test_context_layer_tool_null():
+    result = context_layer_tool.invoke(
+        input={"question": "Provide disturbances for Aveiro Portugal"}
+    )
+    assert result == ""
diff --git a/tests/test_dist_agent.py b/tests/test_dist_agent.py
@@ -1,6 +1,7 @@
+
 from zeno.agents.distalert.agent import graph
 from zeno.agents.maingraph.utils.state import GraphState
-from langchain_core.messages import ToolMessage, AIMessage
+
 
 def test_distalert_agent():
     initial_state = GraphState(
@@ -16,24 +17,4 @@ def test_distalert_agent():
         if not messages:
             continue
         msg = messages[0]
-        if isinstance(msg, ToolMessage):
-            yield pack({msg.name, msg.content})
-        elif isinstance(msg, AIMessage):
-            yield pack
-
-        for key, val in chunk.items():
-            print(f"Messager is {key}")
-            for key2, val2 in val.items():
-                if key2 == "messages":
-                    for msg in val.get("messages", []):
-                        print(msg.content)
-                        if hasattr(msg, "tool_calls"):
-                            print(msg.tool_calls)
-                        if hasattr(msg, "artifact"):
-                            print(str(msg.artifact)[:500])
-                else:
-                    print(key2, val2)
-                pass
-
-
-test_distalert_agent()
+        print(msg)
diff --git a/tests/test_dist_alerts.py b/tests/test_dist_alerts.py
@@ -1,13 +1,23 @@
+from zeno.tools.contextlayer.layers import layer_choices
 from zeno.tools.distalert.dist_alerts_tool import dist_alerts_tool
 
 
 def test_dist_alert_tool():
 
-    natural_lands = "WRI/SBTN/naturalLands/v1/2020"
-    features = ["23"]
+    features = ["2323"]
     result = dist_alerts_tool.invoke(
-        input={"features": features, "landcover": natural_lands, "threshold": 5}
+        input={"features": features, "landcover": layer_choices[1]["dataset"], "threshold": 8}
     )
 
     assert len(result) == 1
-    assert "PRT.6.2.5_1" in result
+    assert "AGO.1.3.4_1" in result
+
+def test_dist_alert_tool_no_landcover():
+
+    features = ["2323"]
+    result = dist_alerts_tool.invoke(
+        input={"features": features, "landcover": None, "threshold": 5}
+    )
+
+    assert len(result) == 1
+    assert "AGO.1.3.4_1" in result
diff --git a/tests/test_location_matcher.py b/tests/test_location_matcher.py
@@ -8,7 +8,7 @@ def test_location_matcher():
         "lisboa portugal": ["PRT.12.7.52_1"],
         "Liisboa portugal": ["PRT.6.2.5_1"],
         "Lisbon portugal": ["PRT.6.2.5_1"],
-        "Lamego viseu portugal": ['PRT.20.5.11_1'],
+        "Lamego viseu portugal": ["PRT.20.5.11_1"],
         "Sao Joao Porto": ["PRT.12.7.41_1"],
         "Bern Switzerland": ["PRT.6.2.5_1"],
     }

diff --git a/zeno/agents/distalert/utils/nodes.py b/zeno/agents/distalert/utils/nodes.py
@@ -3,7 +3,7 @@
 from langgraph.prebuilt import ToolNode
 
 from zeno.agents.maingraph.models import ModelFactory
-from zeno.tools.distalert.context_layer_tool import context_layer_tool
+from zeno.tools.contextlayer.context_layer_tool import context_layer_tool
 from zeno.tools.distalert.dist_alerts_tool import dist_alerts_tool
 from zeno.tools.location.location_tool import location_tool
 

diff --git a/zeno/tools/contextlayer/context_layer_tool.py b/zeno/tools/contextlayer/context_layer_tool.py
@@ -0,0 +1,49 @@
+import json
+
+from langchain_core.tools import tool
+from pydantic import BaseModel, Field
+
+from zeno.agents.maingraph.models import ModelFactory
+from zeno.tools.contextlayer.layers import DatasetNames, layer_choices
+
+
+class grade(BaseModel):
+    """Choice of landcover."""
+
+    choice: DatasetNames = Field(description="Choice of context layer to use")
+
+
+class ContextLayerInput(BaseModel):
+    """Input schema for context layer tool"""
+
+    question: str = Field(description="The question from the user")
+
+
+model = ModelFactory().get("claude-3-5-sonnet-latest").with_structured_output(grade)
+
+
+@tool("context-layer-tool", args_schema=ContextLayerInput, return_direct=False)
+def context_layer_tool(question: str) -> DatasetNames:
+    """
+    Determines whether the question asks for summarizing by land cover.
+    """
+
+    print("---CHECK CONTEXT LAYER TOOL---")
+
+    query = (
+        f"""You are a deciding if a context layer is required for analysing disturbance alerts. \n
+    Here is the user question: {question} \n
+    If the question does not ask for grouping, return empty string. \n
+    If the question asks for grouping the disturbance alerts by landcover, decide which landcover layer is most appropriate. \n
+
+    The following json data gives information about the available layers. Pick the most appropriate one and return its 'dataset' value.
+    Never change the returned 'dataset' value, always return it as is. \n
+    """
+        + json.dumps(layer_choices),
+    )
+
+    result = model.invoke(query)
+
+    print(f"---DECISION: {result.choice or 'no landcover needed'}---")
+
+    return result.choice
diff --git a/zeno/tools/contextlayer/layers.py b/zeno/tools/contextlayer/layers.py
@@ -0,0 +1,78 @@
+from typing import Literal
+
+DatasetNames = Literal[
+    "",
+    "WRI/SBTN/naturalLands/v1",
+    "ESA/WorldCover/v200",
+    "GOOGLE/DYNAMICWORLD/V1",
+    "JAXA/ALOS/PALSAR/YEARLY/FNF4",
+    "JRC/GFC2020_subtypes/V0",
+    "MODIS/061/MCD12Q1",
+    "ESA/WorldCereal/2021/MODELS/v100",
+]
+
+layer_choices = [
+    {
+        "name": "SBTN Natural Lands Map v1",
+        "dataset": "WRI/SBTN/naturalLands/v1",
+        "description": "The SBTN Natural Lands Map v1 is a 2020 baseline map of natural and non-natural land covers intended for use by companies setting science-based targets for nature, specifically the SBTN Land target #1: no conversion of natural ecosystems.  'Natural' and 'non-natural' definitions were adapted from the Accountability Framework initiative's definition of a natural ecosystem as 'one that substantially resembles - in terms of species composition, structure, and ecological function - what would be found in a given area in the absence of major human impacts' and can include managed ecosystems as well as degraded ecosystems that are expected to regenerate either naturally or through management (AFi 2024). The SBTN Natural Lands Map operationalizes this definition by using proxies based on available data that align with AFi guidance to the extent possible.  This map was made by compiling existing global and regional data.You can find the full technical note explaining the methodology linked on the Natural Lands GitHub. This work was a collaboration between Land & Carbon Lab at the World Resources Institute, World Wildlife Fund US, Systemiq, and SBTN.",
+        "resolution": 30,
+        "year": 2020,
+        "band": "classification",
+        "type": "Image",
+    },
+    {
+        "name": "ESA WorldCover",
+        "dataset": "ESA/WorldCover/v200",
+        "description": "The European Space Agency (ESA) WorldCover 10 m 2021 product provides a global land cover map for 2021 at 10 m resolution based on Sentinel-1 and Sentinel-2 data. The WorldCover product comes with 11 land cover classes and has been generated in the framework of the ESA WorldCover project, part of the 5th Earth Observation Envelope Programme (EOEP-5) of the European Space Agency.",
+        "resolution": 10,
+        "year": 2021,
+        "band": "Map",
+        "type": "ImageCollection",
+    },
+    {
+        "name": "Dynamic World V1",
+        "dataset": "GOOGLE/DYNAMICWORLD/V1",
+        "description": "  Dynamic World is a 10m near-real-time (NRT) Land Use/Land Cover (LULC) dataset that includes class probabilities and label information for nine classes.  Dynamic World predictions are available for the Sentinel-2 L1C collection from 2015-06-27 to present. The revisit frequency of Sentinel-2 is between 2-5 days depending on latitude. Dynamic World predictions are generated for Sentinel-2 L1C images with CLOUDY_PIXEL_PERCENTAGE <= 35%. Predictions are masked to remove clouds and cloud shadows using a combination of S2 Cloud Probability, Cloud Displacement Index, and Directional Distance Transform. Given Dynamic World class estimations are derived from single images using a spatial context from a small moving window, top-1 'probabilities' for predicted land covers that are in-part defined by cover over time, like crops, can be comparatively low in the absence of obvious distinguishing features. High-return surfaces in arid climates, sand, sunglint, etc may also exhibit this phenomenon.  To select only pixels that confidently belong to a Dynamic World class, it is recommended to mask Dynamic World outputs by thresholding the estimated 'probability' of the top-1 prediction. ",
+        "resolution": 10,
+        "year": 2024,
+        "band": "label",
+        "type": "ImageCollection",
+    },
+    {
+        "name": "Global 4-class PALSAR-2/PALSAR Forest/Non-Forest Map",
+        "dataset": "JAXA/ALOS/PALSAR/YEARLY/FNF4",
+        "description": "The global forest/non-forest map (FNF) is generated by classifying the SAR image (backscattering coefficient) in the global 25m resolution PALSAR-2/PALSAR SAR mosaic so that strong and low backscatter pixels are assigned as 'forest' and 'non-forest', respectively. Here, 'forest' is defined as the natural forest with the area larger than 0.5 ha and forest cover over 10%. This definition is the same as the Food and Agriculture Organization (FAO) definition. Since the radar backscatter from the forest depends on the region (climate zone), the classification of Forest/Non-Forest is conducted by using a region-dependent threshold of backscatter. The classification accuracy is checked by using in-situ photos and high-resolution optical satellite images.",
+        "resolution": 25,
+        "year": 2018,
+        "band": "fnf",
+        "type": "ImageCollection",
+    },
+    {
+        "name": "Global map of forest types 2020",
+        "dataset": "JRC/GFC2020_subtypes/V0",
+        "description": "The global map of forest types provides a spatially explicit representation of primary forest, naturally regenerating forest and planted forest (including plantation forest) for the year 2020 at 10m spatial resolution. The base layer for mapping these forest types is the extent of forest cover of version 1 of the Global Forest Cover map for year 2020 (JRC GFC 2020). The definitions of the forest types follow the definitions of the Regulation from the European Union 'on the making available on the Union market and the export from the Union of certain commodities and products associated with deforestation and forest degradation' (EUDR, Regulation (EU) 2023/1115), which are similar to characteristics and specific forest categories from the FAO Global Forest Resources Assessment. The year 2020 corresponds to the cut-off date of the EUDR.",
+        "resolution": 10,
+        "year": 2020,
+        "band": "GFT",
+        "type": "ImageCollection",
+    },
+    {
+        "name": "MCD12Q1.061 MODIS Land Cover Type Yearly Global 500m",
+        "dataset": "MODIS/061/MCD12Q1",
+        "description": "The Terra and Aqua combined Moderate Resolution Imaging Spectroradiometer (MODIS) Land Cover Type (MCD12Q1) Version 6.1 data product provides global land cover types at yearly intervals. The MCD12Q1 Version 6.1 data product is derived using supervised classifications of MODIS Terra and Aqua reflectance data. Land cover types are derived from the International Geosphere-Biosphere Programme (IGBP), University of Maryland (UMD), Leaf Area Index (LAI), BIOME-Biogeochemical Cycles (BGC), and Plant Functional Types (PFT) classification schemes. The supervised classifications then underwent additional post-processing that incorporate prior knowledge and ancillary information to further refine specific classes. Additional land cover property assessment layers are provided by the Food and Agriculture Organization (FAO) Land Cover Classification System (LCCS) for land cover, land use, and surface hydrology.",
+        "resolution": 500,
+        "year": 2023,
+        "band": "LC_Type1",
+        "type": "ImageCollection",
+    },
+    {
+        "name": "ESA WorldCereal 10 m v100",
+        "dataset": "ESA/WorldCereal/2021/MODELS/v100",
+        "description": "The European Space Agency (ESA) WorldCereal 10 m 2021 product suite consists of global-scale annual and seasonal crop maps and their related confidence. They were generated as part of the ESA-WorldCereal project. More information on the content of these products and the methodology used to generate them is described in [1].  This collection contains up to 106 agro-ecological zone (AEZ) images for each product which were all processed with respect to their own regional seasonality and should be considered as independent products. These seasons are described in the list below and were developed in [2] as part of the project. Note that cereals as described by WorldCereal include wheat, barley, and rye, which belong to the Triticeae tribe.  WorldCereal seasons description:      tc-annual: a one-year cycle being defined in an AEZ by the end of the last considered growing season     tc-wintercereals: the main cereals season defined in an AEZ     tc-springcereals: optional springcereals season, only defined in certain AEZ     tc-maize-main: the main maize season defined in an AEZ     tc-maize-second: optional second maize season, only defined in certain AEZ ",
+        "resolution": 10,
+        "year": 2021,
+        "band": "classification",
+        "type": "ImageCollection",
+    },
+]
diff --git a/zeno/tools/distalert/context_layer_tool.py b/zeno/tools/distalert/context_layer_tool.py
-Original file line number
+Diff line change
@@ Expand Up / @@ -15,7 +15,7 @@ services: @@
           - .env
         volumes:
-          - .:/app
+          - ./zeno:/app/zeno
       frontend:
         build:
@@ Expand Down @@