Skip to content

Commit

Permalink
all use typing
Browse files Browse the repository at this point in the history
  • Loading branch information
yindaheng98 committed Feb 24, 2024
1 parent 7b833b8 commit afa212c
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 16 deletions.
8 changes: 4 additions & 4 deletions dblp_crawler/data/ccf/key.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import re
from typing import Optional
from typing import Optional, List
from .CCF_A import data as CCF_A
from .CCF_B import data as CCF_B
from .CCF_C import data as CCF_C
Expand All @@ -18,12 +18,12 @@ def key(r: str, ccf: str) -> Optional[str]:
return "/".join(search.group(1).split("/")[0:3])


CCF_A: list[str] = list(filter(None, (key(r, 'A') for r in CCF_A)))
CCF_B: list[str] = list(filter(None, (key(r, 'B') for r in CCF_B)))
CCF_A: List[str] = list(filter(None, (key(r, 'A') for r in CCF_A)))
CCF_B: List[str] = list(filter(None, (key(r, 'B') for r in CCF_B)))
CCF_B += [
"db/journals/pe",
"db/journals/tissec",
"db/journals/cogsci",
"db/conf/hotchips"
]
CCF_C: list[str] = list(filter(None, (key(r, 'C') for r in CCF_C)))
CCF_C: List[str] = list(filter(None, (key(r, 'C') for r in CCF_C)))
10 changes: 5 additions & 5 deletions dblp_crawler/graph.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import abc
import asyncio
import logging
from typing import Optional, Iterable, AsyncIterable, List
from typing import Optional, Iterable, AsyncIterable, List, Dict, Set

from dblp_crawler import download_person, DBLPPerson, Publication, download_journal_list, JournalList
from .gather import gather
Expand All @@ -10,10 +10,10 @@


class Graph(metaclass=abc.ABCMeta):
def __init__(self, pid_list: list[str], journal_list: list[str]) -> None:
self.persons: dict[str, Optional[DBLPPerson]] = {pid: None for pid in pid_list}
self.summarized_person: set[str] = set()
self.publications: set[str] = set()
def __init__(self, pid_list: List[str], journal_list: List[str]) -> None:
self.persons: Dict[str, Optional[DBLPPerson]] = {pid: None for pid in pid_list}
self.summarized_person: Set[str] = set()
self.publications: Set[str] = set()
self.init_journals = journal_list
self.journals_inited = False
self.total_author_succ_count, self.total_author_fail_count = 0, 0
Expand Down
6 changes: 3 additions & 3 deletions dblp_crawler/journal.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import asyncio
import re
from typing import AsyncIterator
from typing import AsyncIterator, List
import xml.etree.ElementTree as ElementTree
from .parser import Publication
from .downloader import download_journal
Expand All @@ -14,7 +14,7 @@ def __init__(self, data: ElementTree.Element) -> None:
assert data.tag == "bht", "Should be xml of a bht!"
self.data = data

def publications(self) -> list[Publication]:
def publications(self) -> List[Publication]:
return [Publication(r) for r in self.data.findall('./dblpcites/r')]


Expand All @@ -26,7 +26,7 @@ def __init__(self, data: ElementTree.Element) -> None:
def title(self) -> str:
return self.data.attrib['title']

def journal_keys(self) -> list[str]:
def journal_keys(self) -> List[str]:
urls = [re.sub(r"\.html$", "", li.attrib["href"]) for li in self.data.findall('./ul/li/ref')]
h1 = self.data.find('./h1').text
for proceedings in self.data.findall('./dblpcites/r/proceedings'):
Expand Down
7 changes: 4 additions & 3 deletions dblp_crawler/keyword/__init__.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
import re
from typing import Set


class Keywords:
def __init__(self) -> None:
self.rules: set[tuple[str, ...]] = set()
self.words: set[str] = set()
self.rules: Set[tuple[str, ...]] = set()
self.words: Set[str] = set()

def add_rule(self, *rule: str) -> None:
"""与关系的单词列"""
rule = tuple(frozenset(word.lower() for word in rule))
self.rules.add(rule)
self.words = self.words.union(rule)

def add_rule_list(self, *rule_list: set[str]) -> None:
def add_rule_list(self, *rule_list: Set[str]) -> None:
"""多个与关系的单词列"""
for rule in rule_list:
self.add_rule(*rule)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

setup(
name='dblp_crawler',
version='2.1.2',
version='2.1.3',
author='yindaheng98',
author_email='[email protected]',
url='https://github.com/yindaheng98/dblp-crawler',
Expand Down

0 comments on commit afa212c

Please sign in to comment.