Skip to content

Commit

Permalink
no add exists
Browse files Browse the repository at this point in the history
  • Loading branch information
yindaheng98 committed Dec 16, 2023
1 parent dc19a36 commit 960e62e
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 10 deletions.
25 changes: 16 additions & 9 deletions dblp_crawler/summarizer/neo4j.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,18 +66,26 @@ def add_person(tx, person: DBLPPerson, added_pubs: set, added_journals: set):
for publication in tqdm(list(person.publications()), desc="Writing author's papers", leave=False, position=2):
if publication.title_hash() in exist_write_papers:
continue
add_edge(tx, person.pid(), publication)
add_edges(tx, [person.pid()], publication)
if publication.key() not in added_pubs:
add_publication(tx, publication, added_journals)
added_pubs.add(publication.key())


def add_edge(tx, author_id, publication: Publication):
tx.run("MERGE (a:Person {dblp_pid: $a}) "
"MERGE (p:Publication {title_hash: $title_hash}) "
"MERGE (a)-[:WRITE]->(p)",
a=author_id,
title_hash=publication.title_hash())
def add_edges(tx, authors_id, publication: Publication):
authors_id_exists = set([
authors_id for (authors_id,) in
tx.run("MATCH (a:Person)-[:WRITE]->(p:Publication {title_hash: $title_hash}) RETURN a.dblp_pid",
title_hash=publication.title_hash()).values()
])
for author_id in authors_id:
if author_id in authors_id_exists:
continue
tx.run("MERGE (a:Person {dblp_pid: $a}) "
"MERGE (p:Publication {title_hash: $title_hash}) "
"MERGE (a)-[:WRITE]->(p)",
a=author_id,
title_hash=publication.title_hash())


class Neo4jGraph(Graph, metaclass=abc.ABCMeta):
Expand All @@ -95,6 +103,5 @@ def summarize_person(self, a: str, person: Optional[DBLPPerson]): # 构建summa

def summarize_publication(self, authors_id, publication: Publication): # 构建summary
self.session.execute_write(add_publication, publication, self.added_journals, self.select) # 把文章信息加进图里
for a in authors_id:
self.session.execute_write(add_edge, a, publication) # 把边加进图里
self.session.execute_write(add_edges, authors_id, publication) # 把边加进图里
self.added_pubs.add(publication.key())
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

setup(
name='dblp_crawler',
version='1.8.16',
version='1.9',
author='yindaheng98',
author_email='[email protected]',
url='https://github.com/yindaheng98/dblp-crawler',
Expand Down

0 comments on commit 960e62e

Please sign in to comment.