Skip to content

Commit

Permalink
faster
Browse files Browse the repository at this point in the history
  • Loading branch information
yindaheng98 committed Dec 16, 2023
1 parent cafa0ea commit 518aa1d
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 9 deletions.
21 changes: 13 additions & 8 deletions dblp_crawler/summarizer/neo4j.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import abc
import logging
from typing import Optional
from tqdm import tqdm

from neo4j import Session

Expand All @@ -24,16 +25,20 @@ def add_publication(tx, publication, added_journals: set, selected=False):
doi=publication.doi())
if publication.journal_key() and publication.journal_key() != "db/journals/corr":
if publication.journal_key() not in added_journals:
tx.run("MERGE (p:Journal {dblp_key:$dblp_key}) SET p.dblp_name=$dblp_name, p.ccf=$ccf",
tx.run("MERGE (j:Journal {dblp_key:$dblp_key}) SET j.dblp_name=$dblp_name, j.ccf=$ccf "
"MERGE (p:Publication {title_hash:$title_hash})"
"MERGE (p)-[:PUBLISH]->(j)",
dblp_key=publication.journal_key(),
dblp_name=publication.journal(),
ccf=publication.ccf())
ccf=publication.ccf(),
title_hash=publication.title_hash())
added_journals.add(publication.journal_key())
tx.run("MERGE (p:Publication {title_hash:$title_hash})"
"MERGE (j:Journal {dblp_key:$dblp_key})"
"MERGE (p)-[:PUBLISH]->(j)",
title_hash=publication.title_hash(),
dblp_key=publication.journal_key())
else:
tx.run("MERGE (p:Publication {title_hash:$title_hash})"
"MERGE (j:Journal {dblp_key:$dblp_key})"
"MERGE (p)-[:PUBLISH]->(j)",
dblp_key=publication.journal_key(),
title_hash=publication.title_hash())


def find_orcid(person):
Expand All @@ -58,7 +63,7 @@ def add_person(tx, person: DBLPPerson, added_pubs: set, added_journals: set):
"MATCH (a:Person {dblp_pid: $pid})-[:WRITE]->(p:Publication) RETURN p.title_hash",
pid=person.pid()
).values()])
for publication in person.publications():
for publication in tqdm(list(person.publications()), desc="Writing author's papers", leave=False, position=2):
if publication.title_hash() in exist_write_papers:
continue
add_edge(tx, person.pid(), publication)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

setup(
name='dblp_crawler',
version='1.8.11',
version='1.8.12',
author='yindaheng98',
author_email='[email protected]',
url='https://github.com/yindaheng98/dblp-crawler',
Expand Down

0 comments on commit 518aa1d

Please sign in to comment.