Merge pull request #45 from ClimateCompatibleGrowth/43-add-fastapi-en…

…dpoints 43 add fastapi endpoints
ClimateCompatibleGrowth · Nov 29, 2024 · 94e5aa9 · 94e5aa9
2 parents 00d9c4e + e0efcf1
commit 94e5aa9
Show file tree

Hide file tree

Showing 28 changed files with 1,004 additions and 490 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,21 +1,13 @@
-# Use an official Python runtime as a parent image
 FROM python:3.11.7-bookworm
 
-# Set the working directory in the container to /app
-WORKDIR /app
+WORKDIR /research-index
 
-# Add the current directory contents into the container at /app
-ADD ./app /app
-ADD requirements.txt /app/requirements.txt
+ADD requirements.txt /research-index/requirements.txt
 
-# Install packages for the memgraph client
-RUN apt update -y
-RUN apt install -y python3-dev cmake make gcc g++ libssl-dev
-
-# Install any needed packages specified in requirements.txt
 RUN pip install --no-cache-dir -r requirements.txt
 
-# Make port 80 available to the world outside this container
-EXPOSE 80
+ADD . /research-index
+
+EXPOSE 8000
 
-CMD ["gunicorn", "--bind", "0.0.0.0:80", "app:app"]
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/README.md b/README.md
@@ -2,9 +2,16 @@
 
 ## Development
 
+Create a `.env` file in the project root with the following environment variables:
+```sh
+MG_HOST=          # Memgraph host address
+MG_PORT=          # Default Memgraph port
+MG_PORT_ALT=      # Alternative port
+```
+
 To enter development mode of the website, with the memgraph database running in the background, run
 
-    python app/app.py
+    fastapi dev main.py
 
 ## Deployment
 

diff --git a/app/app.py b/app/app.py
diff --git a/app/core/__init__.py b/app/core/__init__.py
diff --git a/app/core/config.py b/app/core/config.py
@@ -0,0 +1,19 @@
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+
+class Settings:
+    _instance = None
+
+    def __new__(cls, *args, **kwargs):
+        if cls._instance is None:
+            cls._instance = super(Settings, cls).__new__(cls)
+            cls._instance._init_instance()
+        return cls._instance
+
+    def _init_instance(self):
+        self.MG_HOST = os.getenv("MG_HOST")
+        self.MG_PORT = os.getenv("MG_PORT")
+
+settings = Settings()
diff --git a/app/crud/__init__.py b/app/crud/__init__.py
diff --git a/app/crud/author.py b/app/crud/author.py
@@ -0,0 +1,162 @@
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+
+from neo4j import Driver
+
+from app.db.session import connect_to_db
+
+
+class Author:
+    @connect_to_db
+    def get(
+        self, id: str, db: Driver, result_type: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """Retrieve author information from the database.
+
+        Parameters
+        ----------
+        id : str
+            UUID of the author
+        db : Driver
+            Neo4j database driver
+        result_type : str, optional
+            Type of result formatting
+
+        Returns
+        -------
+        dict
+            Author information dictionary containing:
+            - uuid : str
+                Author's unique identifier
+            - orcid : str
+                Author's ORCID
+            - first_name : str
+                Author's first name
+            - last_name : str
+                Author's last name
+            - affiliations : list
+                List of partner affiliations
+            - workstreams : list
+                List of associated workstreams
+        Notes
+        -----
+        Example Neo4j queries:
+
+        MATCH (a:Author)
+        RETURN a.first_name as first_name, a.last_name as last_name,
+            p.name as affiliation;
+
+        MATCH (a:Author)-[r:author_of]->(p:Article)
+        OPTIONAL MATCH (a:Author)-[:member_of]->(p:Partner)
+        WHERE a.uuid = $uuid
+        RETURN *;
+        """
+        author_query = """
+            MATCH (a:Author) WHERE a.uuid = $uuid
+            OPTIONAL MATCH (a)-[:member_of]->(p:Partner)
+            OPTIONAL MATCH (a)-[:member_of]->(u:Workstream)
+            RETURN a.uuid as uuid, a.orcid as orcid,
+                    a.first_name as first_name, a.last_name as last_name,
+                    collect(p.id, p.name) as affiliations,
+                    collect(u.id, u.name) as workstreams;"""
+
+        author, _, _ = db.execute_query(author_query, uuid=id)
+        results = author[0].data()
+
+        collab_query = """
+            MATCH (a:Author)-[r:author_of]->(p:Output)<-[s:author_of]-(b:Author)
+            WHERE a.uuid = $uuid AND b.uuid <> $uuid
+            RETURN DISTINCT b.uuid as uuid, b.first_name as first_name, b.last_name as last_name, b.orcid as orcid
+            LIMIT 5"""
+        colabs, summary, keys = db.execute_query(collab_query, uuid=id)
+
+        results["collaborators"] = colabs
+
+        if result_type and result_type in [
+            "publication",
+            "dataset",
+            "software",
+            "other",
+        ]:
+            publications_query = """
+                MATCH (a:Author)-[:author_of]->(p:Output)
+                WHERE (a.uuid) = $uuid AND (p.result_type = $result_type)
+                CALL {
+                    WITH p
+                    MATCH (b:Author)-[r:author_of]->(p)
+                    RETURN b
+                    ORDER BY r.rank
+                }
+                OPTIONAL MATCH (p)-[:REFERS_TO]->(c:Country)
+                RETURN p as outputs,
+                       collect(DISTINCT c) as countries,
+                       collect(DISTINCT b) as authors
+                ORDER BY outputs.publication_year DESCENDING;"""
+
+            result, _, _ = db.execute_query(
+                publications_query, uuid=id, result_type=result_type
+            )
+
+        else:
+            publications_query = """
+                MATCH (a:Author)-[:author_of]->(p:Output)
+                WHERE a.uuid = $uuid
+                CALL {
+                    WITH p
+                    MATCH (b:Author)-[r:author_of]->(p)
+                    RETURN b
+                    ORDER BY r.rank
+                }
+                OPTIONAL MATCH (p)-[:REFERS_TO]->(c:Country)
+                RETURN p as outputs,
+                    collect(DISTINCT c) as countries,
+                    collect(DISTINCT b) as authors
+                ORDER BY outputs.publication_year DESCENDING;"""
+
+            result, summary, keys = db.execute_query(publications_query, uuid=id)
+
+        results["outputs"] = [x.data() for x in result]
+        for result in results["outputs"]:
+            neo4j_datetime = result["outputs"]["cited_by_count_date"]
+            result["outputs"]["cited_by_count_date"] = datetime.fromtimestamp(
+                neo4j_datetime.to_native().timestamp()
+            )
+        return results
+
+    @connect_to_db
+    def count(self, id: str, db: Driver) -> Dict[str, int]:
+        """Returns counts of articles by result type for a given author.
+
+        Parameters
+        ----------
+        id : str
+            UUID of the author
+        db : Driver
+            Neo4j database driver
+
+        Returns
+        -------
+        Dict[str, int]
+            Dictionary mapping result types to their counts
+        """
+        query = """
+                MATCH (a:Author)-[b:author_of]->(o:Article)
+                WHERE (a.uuid) = $uuid
+                RETURN o.result_type as result_type, count(o) as count
+                """
+        records, summary, keys = db.execute_query(query, uuid=id)
+        return {x.data()["result_type"]: x.data()["count"] for x in records}
+
+    @connect_to_db
+    def get_all(self, db: Driver) -> List[Dict[str, Any]]:
+        """Retrieve list of authors from the database."""
+        query = """MATCH (a:Author)
+                   OPTIONAL MATCH (a)-[:member_of]->(p:Partner)
+                   OPTIONAL MATCH (a)-[:member_of]->(u:Workstream)
+                   optional MATCH (a)-[:author_of]->(o:PUBLICATION)
+                   RETURN a.first_name as first_name, a.last_name as last_name, a.uuid as uuid, a.orcid as orcid, collect(p.id, p.name) as affiliations, collect(u.id, u.name) as workstreams
+                   ORDER BY last_name;
+                   """
+        records, summary, keys = db.execute_query(query)
+
+        return [record.data() for record in records]