Merge pull request #50 from ClimateCompatibleGrowth/schema

Update schema for API routes
ClimateCompatibleGrowth · Dec 10, 2024 · bcce7fa · bcce7fa
2 parents ec5937b + 17f8d71
commit bcce7fa
Show file tree

Hide file tree

Showing 14 changed files with 434 additions and 220 deletions.
diff --git a/app/crud/author.py b/app/crud/author.py
@@ -57,8 +57,8 @@ def get(
             OPTIONAL MATCH (a)-[:member_of]->(u:Workstream)
             RETURN a.uuid as uuid, a.orcid as orcid,
                     a.first_name as first_name, a.last_name as last_name,
-                    collect(p.id, p.name) as affiliations,
-                    collect(u.id, u.name) as workstreams;"""
+                    collect(p) as affiliations,
+                    collect(u) as workstreams;"""
 
         author, _, _ = db.execute_query(author_query, uuid=id)
         results = author[0].data()
@@ -70,7 +70,7 @@ def get(
             LIMIT 5"""
         colabs, summary, keys = db.execute_query(collab_query, uuid=id)
 
-        results["collaborators"] = colabs
+        results["collaborators"] = [x.data() for x in colabs]
 
         if result_type and result_type in [
             "publication",
@@ -88,10 +88,10 @@ def get(
                     ORDER BY r.rank
                 }
                 OPTIONAL MATCH (p)-[:REFERS_TO]->(c:Country)
-                RETURN p as outputs,
+                RETURN p as results,
                        collect(DISTINCT c) as countries,
                        collect(DISTINCT b) as authors
-                ORDER BY outputs.publication_year DESCENDING;"""
+                ORDER BY results.publication_year DESCENDING;"""
 
             result, _, _ = db.execute_query(
                 publications_query, uuid=id, result_type=result_type
@@ -108,19 +108,22 @@ def get(
                     ORDER BY r.rank
                 }
                 OPTIONAL MATCH (p)-[:REFERS_TO]->(c:Country)
-                RETURN p as outputs,
+                RETURN p as results,
                     collect(DISTINCT c) as countries,
                     collect(DISTINCT b) as authors
-                ORDER BY outputs.publication_year DESCENDING;"""
-
-            result, summary, keys = db.execute_query(publications_query, uuid=id)
-
-        results["outputs"] = [x.data() for x in result]
-        for result in results["outputs"]:
-            neo4j_datetime = result["outputs"]["cited_by_count_date"]
-            result["outputs"]["cited_by_count_date"] = datetime.fromtimestamp(
-                neo4j_datetime.to_native().timestamp()
-            )
+                ORDER BY results.publication_year DESCENDING;"""
+
+            records, _, _ = db.execute_query(publications_query, uuid=id)
+            outputs = []
+            for x in records:
+                data = x.data()
+                package = data['results']
+                package['authors'] = data['authors']
+                package['countries'] = data['countries']
+                outputs.append(package)
+
+        results['outputs'] = {}
+        results['outputs']['results'] = outputs
         return results
 
     @connect_to_db
@@ -145,18 +148,28 @@ def count(self, id: str, db: Driver) -> Dict[str, int]:
                 RETURN o.result_type as result_type, count(o) as count
                 """
         records, summary, keys = db.execute_query(query, uuid=id)
-        return {x.data()["result_type"]: x.data()["count"] for x in records}
-
+        if len(records) > 0:
+            counts = {x.data()["result_type"]: x.data()["count"] for x in records}
+            counts['total'] = sum(counts.values())
+            return counts
+        else:
+            return {'total': 0,
+                    'publications': 0,
+                    'datasets': 0,
+                    'other': 0,
+                    'software': 0}
+
     @connect_to_db
-    def get_all(self, db: Driver) -> List[Dict[str, Any]]:
+    def get_all(self, db: Driver, skip: int, limit: int) -> List[Dict[str, Any]]:
         """Retrieve list of authors from the database."""
         query = """MATCH (a:Author)
                    OPTIONAL MATCH (a)-[:member_of]->(p:Partner)
                    OPTIONAL MATCH (a)-[:member_of]->(u:Workstream)
-                   optional MATCH (a)-[:author_of]->(o:PUBLICATION)
-                   RETURN a.first_name as first_name, a.last_name as last_name, a.uuid as uuid, a.orcid as orcid, collect(p.id, p.name) as affiliations, collect(u.id, u.name) as workstreams
-                   ORDER BY last_name;
+                   RETURN a.first_name as first_name, a.last_name as last_name, a.uuid as uuid, a.orcid as orcid, collect(p) as affiliations, collect(u) as workstreams
+                   ORDER BY last_name
+                   SKIP $skip
+                   LIMIT $limit;
                    """
-        records, summary, keys = db.execute_query(query)
+        records, _, _ = db.execute_query(query, skip=skip, limit=limit)
 
         return [record.data() for record in records]
diff --git a/app/crud/output.py b/app/crud/output.py
@@ -35,47 +35,10 @@ def get(self, id: str, db: Driver) -> Dict[str, Any]:
                 - orcid : str
                     Author's ORCID identifier
         """
-        query = """MATCH (p:Article)
-                   WHERE p.uuid = $uuid
-                   OPTIONAL MATCH (p)-[:REFERS_TO]->(c:Country)
-                   RETURN DISTINCT p as output, collect(DISTINCT c) as countries;"""
-        records, summary, keys = db.execute_query(query, uuid=id)
-        print(records[0].data())
-        results = {}
-        results = records[0].data()["output"]
-        results["countries"] = records[0].data()["countries"]
 
-        authors_query = """MATCH (a:Author)-[r:author_of]->(p:Article)
-                            WHERE p.uuid = $uuid
-                            RETURN a.uuid as uuid, a.first_name as first_name, a.last_name as last_name, a.orcid as orcid;"""
-
-        records, summary, keys = db.execute_query(authors_query, uuid=id)
-
-        results["authors"] = [x.data() for x in records]
-
-        return results
-    @connect_to_db
-    def get_all(self, db: Driver) -> List[Dict[str, Any]]:
-        """Retrieve all article outputs with their associated countries and authors.
-
-        Parameters
-        ----------
-        db : Driver
-            Neo4j database driver
-
-        Returns
-        -------
-        List[Dict[str, Any]]
-            List of dictionaries containing:
-            - outputs : Dict
-                Article properties
-            - countries : List[Dict]
-                List of referenced countries
-            - authors : List[Dict]
-                List of authors ordered by rank
-        """
         query = """
                 MATCH (o:Article)
+                WHERE o.uuid = $uuid
                 OPTIONAL MATCH (o)-[:REFERS_TO]->(c:Country)
                 CALL
                 {
@@ -84,10 +47,16 @@ def get_all(self, db: Driver) -> List[Dict[str, Any]]:
                 RETURN a
                 ORDER BY b.rank
                 }
-                RETURN o as outputs, collect(DISTINCT c) as countries, collect(DISTINCT a) as authors;
-        """
-        records, summary, keys = db.execute_query(query)
-        return [x.data() for x in records]
+                RETURN o as outputs, collect(DISTINCT c) as countries, collect(DISTINCT a) as authors
+                """
+        records, summary, keys = db.execute_query(query,
+                                                        uuid=id)
+        data = [x.data() for x in records][0]
+        package = data['outputs']
+        package['authors'] = data['authors']
+        package['countries'] = data['countries']
+
+        return package
 
     @connect_to_db
     def count(self, db: Driver) -> Dict[str, int]:
@@ -109,10 +78,19 @@ def count(self, db: Driver) -> Dict[str, int]:
                 RETURN o.result_type as result_type, count(o) as count
                 """
         records, summary, keys = db.execute_query(query)
-        return {x.data()["result_type"]: x.data()["count"] for x in records}
+        if len(records) > 0:
+            counts = {x.data()["result_type"]: x.data()["count"] for x in records}
+            counts['total'] = sum(counts.values())
+            return counts
+        else:
+            return {'total': 0,
+                    'publications': 0,
+                    'datasets': 0,
+                    'other': 0,
+                    'software': 0}
 
     @connect_to_db
-    def filter_type(self, db: Driver, result_type: str) -> List[Dict[str, Any]]:
+    def filter_type(self, db: Driver, result_type: str, skip: int, limit: int) -> List[Dict[str, Any]]:
         """Filter articles by result type and return with ordered authors.
 
         Parameters
@@ -149,9 +127,93 @@ def filter_type(self, db: Driver, result_type: str) -> List[Dict[str, Any]]:
                 RETURN a
                 ORDER BY b.rank
                 }
+
+                RETURN o as outputs,
+                       collect(DISTINCT c) as countries,
+                       collect(DISTINCT a) as authors
+                SKIP $skip
+                LIMIT $limit;
+        """
+        records, _, _ = db.execute_query(query,
+                                         result_type=result_type,
+                                         skip=skip,
+                                         limit=limit)
+        outputs = []
+        for x in records:
+            data = x.data()
+            package = data['outputs']
+            package['authors'] = data['authors']
+            package['countries'] = data['countries']
+            outputs.append(package)
+
+        return outputs
+
+    @connect_to_db
+    def filter_country(self,
+                       db: Driver,
+                       result_type: str,
+                       skip: int,
+                       limit: int,
+                       country: str) -> List[Dict[str, Any]]:
+        """Filter articles by country and result type and return with ordered authors.
+
+        Parameters
+        ----------
+        db : Driver
+            Neo4j database driver
+        result_type : str
+            Type of result to filter by (e.g. 'journal_article')
+        skip: int
+            Number of rows in the output to skip
+        limit: int
+            Number of rows to return
+        country: str
+            Three letter ISO country code
+
+        Returns
+        -------
+        List[Dict[str, Any]]
+            Filtered list of articles containing:
+            - outputs : Dict
+                Article properties
+            - countries : List[Dict]
+                List of referenced countries
+            - authors : List[Dict]
+                List of authors ordered by rank
+
+        Raises
+        ------
+        ValueError
+            If result_type is invalid
+        """
+        query = """
+                MATCH (o:Article)-[:REFERS_TO]->(c:Country)
+                WHERE o.result_type = $result_type
+                AND c.id = $country_id
+                CALL
+                {
+                WITH o
+                MATCH (a:Author)-[b:author_of]->(o)
+                RETURN a
+                ORDER BY b.rank
+                }
                 RETURN o as outputs,
                        collect(DISTINCT c) as countries,
-                       collect(DISTINCT a) as authors;
+                       collect(DISTINCT a) as authors
+                SKIP $skip
+                LIMIT $limit;
         """
-        records, summary, keys = db.execute_query(query, result_type=result_type)
-        return [x.data() for x in records]
+        records, _, _ = db.execute_query(query,
+                                         result_type=result_type,
+                                         country_id=country,
+                                         skip=skip,
+                                         limit=limit)
+        outputs = []
+        for x in records:
+            data = x.data()
+            package = data['outputs']
+            package['authors'] = data['authors']
+            package['countries'] = data['countries']
+            outputs.append(package)
+
+        return outputs
diff --git a/app/crud/workstream.py b/app/crud/workstream.py
@@ -13,7 +13,7 @@ def get_all(self, db: Driver) -> Dict[str, Any]:
                 RETURN p.id as id, p.name as name, collect(a) as members"""
         records, summary, keys = db.execute_query(query)
         return [x.data() for x in records]
-    
+
     @connect_to_db
     def get(self, id: str, db: Driver) -> Dict[str, Any]:
         query = """MATCH (p:Workstream)

diff --git a/app/db/session.py b/app/db/session.py
@@ -1,11 +1,13 @@
 from functools import wraps
 from  app.core.config import settings
 
+
 from neo4j import GraphDatabase
 
 MG_HOST = settings.MG_HOST
 MG_PORT = settings.MG_PORT
 
+
 def connect_to_db(f):
     @wraps(f)
     def with_connection_(*args, **kwargs):
@@ -15,11 +17,10 @@ def with_connection_(*args, **kwargs):
             AUTH = ("", "")
             with GraphDatabase.driver(URI, auth=AUTH) as db:
                 db.verify_connectivity()
-                result = f(*args, db, **kwargs)
+                return f(*args, db, **kwargs)
         except Exception as e:
             raise ValueError(e)
         finally:
             db.close()
-        return result
 
     return with_connection_
diff --git a/app/ingest.py b/app/ingest.py