+
\ No newline at end of file
diff --git a/htmlBuilder/resources/log_temp.html b/htmlBuilder/resources/log_temp.html
new file mode 100644
index 0000000..1566be7
--- /dev/null
+++ b/htmlBuilder/resources/log_temp.html
@@ -0,0 +1,16 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/htmlBuilder/richBuilder.py b/htmlBuilder/richBuilder.py
new file mode 100644
index 0000000..9db828d
--- /dev/null
+++ b/htmlBuilder/richBuilder.py
@@ -0,0 +1,63 @@
+from os.path import dirname, realpath, join
+import config
+from utils.path import mkdir
+from copy import copy
+from bs4 import BeautifulSoup
+from utils.logging import *
+
+
+# logging path
+_logging_path = join(config.root, "output")
+mkdir(_logging_path)
+
+# resource path
+_dir_path_of_this_file = dirname(realpath(__file__))
+_resource_path = join(_dir_path_of_this_file, 'resources')
+
+# load templates
+with open(join(_resource_path, 'entry_temp.html')) as _f:
+ _entry_temp = BeautifulSoup(_f, features="html.parser").find('a')
+
+# make soup with temp
+with open(join(_resource_path, 'log_temp.html')) as f:
+ html_soup = BeautifulSoup(f, features="html.parser")
+
+
+def get_logging_file_path(filename: str) -> str:
+ path = join(_logging_path, filename)
+ return path
+
+
+def _write_entry(title: list, paper_link: str, id_: int):
+ # create entry from temp
+ entry = copy(_entry_temp)
+ # sign entry
+ entry['data-target'] = '#{}'.format(id_)
+ entry.div['id'] = '{}'.format(id_)
+ entry.p.a['herf'] = paper_link
+ # embed title
+ cnt = 0
+ for index, item in enumerate(entry.p.children):
+ if item.name == "span":
+ item.string = str(title[cnt])
+ cnt += 1
+ # embed sub-page
+ entry.div.object['data'] = paper_link
+ entry.div.object.embed['src'] = paper_link
+ # entry.div.iframe['src'] = paper_link
+ # add to soup
+ html_soup.html.body.div.append(entry)
+
+
+def save_as_html(info, topic):
+ saving_path = get_logging_file_path("PaperHub_Searching_Result__{}.html".format(topic))
+ # body
+ for index, item in enumerate(info):
+ _write_entry([item['year'], item['venue'], item['title']], item['ee'], index)
+ # header
+ html_soup.html.head.title.string = "PaperHub: {}".format(topic)
+ html_soup.html.body.h1.string = topic
+ html_soup.html.body.h3.string = '{} results'.format(str(len(info)))
+ with open(saving_path, 'w', encoding='utf-8') as f_:
+ f_.write(str(html_soup))
+ log('{}{} results have been successfully saved to {}'.format(STD_INFO, str(len(info)), saving_path))
diff --git a/html_builder/__pycache__/__init__.cpython-37.pyc b/html_builder/__pycache__/__init__.cpython-37.pyc
deleted file mode 100644
index 23131f3..0000000
Binary files a/html_builder/__pycache__/__init__.cpython-37.pyc and /dev/null differ
diff --git a/requirements.txt b/requirements.txt
index a7b439c..fabd88d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,4 +4,5 @@ scidownl~=0.2.7
termcolor~=1.1.0
colorama~=0.4.4
requests~=2.24.0
-urllib3~=1.25.11
\ No newline at end of file
+urllib3~=1.25.11
+beautifulsoup4
diff --git a/scraps/__pycache__/__init__.cpython-37.pyc b/scraps/__pycache__/__init__.cpython-37.pyc
deleted file mode 100644
index 35b181f..0000000
Binary files a/scraps/__pycache__/__init__.cpython-37.pyc and /dev/null differ
diff --git a/scraps/__pycache__/dblpSearcher.cpython-37.pyc b/scraps/__pycache__/dblpSearcher.cpython-37.pyc
deleted file mode 100644
index 15d7158..0000000
Binary files a/scraps/__pycache__/dblpSearcher.cpython-37.pyc and /dev/null differ
diff --git a/scraps/dblpSearcher.py b/scraps/dblpSearcher.py
index ed9add8..5543a66 100644
--- a/scraps/dblpSearcher.py
+++ b/scraps/dblpSearcher.py
@@ -37,6 +37,18 @@ def get_xml(terms, number, batch_size=100):
xml += [fetch(url)]
return xml
+# def get_xml(terms, number):
+# """
+# :param terms: string of searched terms
+# :param number: number of results
+# :param batch_size: number of results extracted from dblp each time
+# :return: a list of xml strings
+# """
+# xml = []
+# url = 'https://dblp.org/search/publ/api?q=' + str(terms) + '&h=' + str(number)
+# xml += [fetch(url)]
+# return xml
+
def get_attribute(info):
"""
diff --git a/settings.json b/settings.json
index 85ac480..8f68849 100644
--- a/settings.json
+++ b/settings.json
@@ -1,6 +1,6 @@
{
"publishers": [
- "sigcomm", "mobicom", "nsdi", "sensys", "mobisys", "imwut" , "ipsn", "infocom", "mobihoc"
+ "sigcomm", "mobicom", "nsdi", "sensys", "mobisys", "imwut" , "ubicomp", "ipsn", "infocom", "mobihoc", "sysml", "hotedgevideo", "MM", "VR"
],
"number_per_search": 100,
"number_per_publisher": 100,
diff --git a/utils/__init__.py b/utils/__init__.py
index 09dfeb5..f288e13 100644
--- a/utils/__init__.py
+++ b/utils/__init__.py
@@ -1,2 +1,2 @@
import colorama
-colorama.init()
\ No newline at end of file
+colorama.init()
diff --git a/utils/__pycache__/__init__.cpython-37.pyc b/utils/__pycache__/__init__.cpython-37.pyc
deleted file mode 100644
index 6c3d58e..0000000
Binary files a/utils/__pycache__/__init__.cpython-37.pyc and /dev/null differ
diff --git a/utils/__pycache__/logging.cpython-37.pyc b/utils/__pycache__/logging.cpython-37.pyc
deleted file mode 100644
index 37d9f6f..0000000
Binary files a/utils/__pycache__/logging.cpython-37.pyc and /dev/null differ
diff --git a/utils/__pycache__/path.cpython-37.pyc b/utils/__pycache__/path.cpython-37.pyc
deleted file mode 100644
index 4c2d191..0000000
Binary files a/utils/__pycache__/path.cpython-37.pyc and /dev/null differ
diff --git a/utils/parallel.py b/utils/parallel.py
new file mode 100644
index 0000000..35770af
--- /dev/null
+++ b/utils/parallel.py
@@ -0,0 +1,19 @@
+import threading
+
+
+# https://stackoverflow.com/questions/38978652/how-to-protect-an-object-using-a-lock-in-python
+class HidingLock(object):
+ def __init__(self, obj, lock=None):
+ self.lock = lock or threading.RLock()
+ self._obj = obj
+
+ def __enter__(self):
+ self.lock.acquire()
+ return self._obj
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.lock.release()
+
+ def set(self, obj):
+ with self:
+ self._obj = obj
\ No newline at end of file
diff --git a/utils/path.py b/utils/path.py
index da6e3a6..f23d11e 100644
--- a/utils/path.py
+++ b/utils/path.py
@@ -1,6 +1,8 @@
import os.path as osp
import os
from pathlib import Path
+import unicodedata
+import re
def mkdir(path):
@@ -11,3 +13,20 @@ def mkdir(path):
def path_parent(path):
return Path(path).parent
+
+def slugify(value, allow_unicode=False):
+ """
+ Taken from https://github.com/django/django/blob/master/django/utils/text.py
+ Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
+ dashes to single dashes. Remove characters that aren't alphanumerics,
+ underscores, or hyphens. Convert to lowercase. Also strip leading and
+ trailing whitespace, dashes, and underscores.
+ """
+ value = str(value)
+ if allow_unicode:
+ value = unicodedata.normalize('NFKC', value)
+ else:
+ value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
+ value = re.sub(r'[^\w\s-]', '', value.lower())
+ return re.sub(r'[-\s]+', '-', value).strip('-_')
+