-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Create ci.yml * Update __main__.py * Update conf.py * Update __main__.py * Update cli.py * Update test_execution.py * Update test_wrapper.py * Fix convention for .github/workflows/python-package.yml.py * Fix convention for docs.source.conf.py * Fix convention for .github/workflows/python-package.yml.py * Fix convention for github/workflows/python-package.yml.py * Fix convention for .github/workflows/python-package.yml.py * Fix convention for .github/workflows/python-package.yml.py * Fix convention for src/melt/tools/data/dataset.py * Fix convention for src/melt/tools/data/loader.py * Fix convention for src/melt/tools/data/__init__.py * Fix convention for src/melt/tools/data/parser.py * Delete .github/workflows/.github/workflows/ci.yml * Fix convention for src/melt/tools/data/dataset.py.py * Fix convention for src/melt/tools/metrics/data_stats_metric/__init__.py * Fix convention for src/melt/tools/metrics/data_stats_metric/data_stats_metric.py * Fix convention for src/melt/tools/metrics/summac/utils_misc.py * Fix convention for src/melt/tools/metrics/base.py * Fix convention for src/melt/tools/metrics/basic_metrics.py * Fix convention for src/melt/tools/metrics/bias.py * Fix convention for src/melt/tools/metrics/calibration_metric.py * Fix convention for src/melt/tools/metrics/ir.py * Fix convention for src/melt/tools/metrics/language.py * Fix convention for src/melt/tools/metrics/name_detector.py * Fix convention for src/melt/tools/metrics/name_detector.py * Fix convention for src/melt/tools/metrics/name_detector.py * Fix convention for src/melt/tools/metrics/post_process.py * Fix convention for src/melt/tools/metrics/question_answering.py * Fix convention for src/melt/tools/metrics/reasoning.py * Fix convention for docs/source/conf.py * Fix convention for src/melt/tools/metrics/summac/model_summac.py * Fix convention for src/melt/tools/metrics/question_answering.py --------- Co-authored-by: Duc Quang Nguyen <[email protected]>
- Loading branch information
1 parent
cdb6ae0
commit a7d6332
Showing
24 changed files
with
1,838 additions
and
1,332 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,71 +1,52 @@ | ||
# Configuration file for the Sphinx documentation builder. | ||
# | ||
# This file only contains a selection of the most common options. For a full | ||
# list see the documentation: | ||
# https://www.sphinx-doc.org/en/master/usage/configuration.html | ||
""" | ||
Configuration file for the Sphinx documentation builder. | ||
# -- Path setup -------------------------------------------------------------- | ||
This file contains a selection of the most common options. | ||
For a full list, see the documentation: | ||
https://www.sphinx-doc.org/en/master/usage/configuration.html | ||
""" | ||
|
||
# If extensions (or modules to document with autodoc) are in another directory, | ||
# add these directories to sys.path here. If the directory is relative to the | ||
# documentation root, use os.path.abspath to make it absolute, like shown here. | ||
# | ||
import datetime | ||
import os | ||
import sys | ||
from datetime import datetime | ||
|
||
# Path setup | ||
sys.path.insert(0, os.path.abspath("../../src")) | ||
|
||
# -- Project information ----------------------------------------------------- | ||
|
||
project = "MELTs" | ||
author = "Thu Nguyen Hoang Anh" | ||
copyright = "{}, {}".format(datetime.datetime.now().year, author) | ||
# Project information | ||
PROJECT = "MELTs" | ||
AUTHOR = "Thu Nguyen Hoang Anh" | ||
COPYRIGHT = f"{datetime.now().year}, {AUTHOR}" | ||
|
||
# The full version, including alpha/beta/rc tags | ||
release = "0.1" | ||
|
||
|
||
# -- General configuration --------------------------------------------------- | ||
RELEASE = "0.1" | ||
|
||
master_doc = "index" | ||
# General configuration | ||
MASTER_DOC = "index" | ||
|
||
# Add any Sphinx extension module names here, as strings. They can be | ||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom | ||
# ones. | ||
extensions = [ | ||
# Sphinx extension modules as strings, can be built-in or custom | ||
EXTENSIONS = [ | ||
"sphinx.ext.duration", | ||
"sphinx.ext.autodoc", | ||
"sphinx.ext.coverage", | ||
"sphinx_rtd_theme", | ||
"sphinx.ext.doctest", | ||
] | ||
|
||
autodoc_mock_imports = ["pyemd"] | ||
|
||
# Add any paths that contain templates here, relative to this directory. | ||
templates_path = ["_templates"] | ||
|
||
# apidoc_module_dir = '../../src/melt/' | ||
# apidoc_output_dir = 'api' | ||
# apidoc_excluded_paths = [] | ||
# apidoc_separate_modules = True | ||
# List of modules to mock during autodoc generation | ||
AUTODOC_MOCK_IMPORTS = ["pyemd"] | ||
|
||
# List of patterns, relative to source directory, that match files and | ||
# directories to ignore when looking for source files. | ||
# This pattern also affects html_static_path and html_extra_path. | ||
exclude_patterns = [] | ||
# Paths that contain templates | ||
TEMPLATES_PATH = ["_templates"] | ||
|
||
autodoc_member_order = "alphabetical" | ||
# List of patterns to ignore when looking for source files | ||
EXCLUDE_PATTERNS = [] | ||
|
||
# -- Options for HTML output ------------------------------------------------- | ||
# Sort members alphabetically in the autodoc | ||
AUTODOC_MEMBER_ORDER = "alphabetical" | ||
|
||
# The theme to use for HTML and HTML Help pages. See the documentation for | ||
# a list of builtin themes. | ||
# | ||
html_theme = "sphinx_rtd_theme" | ||
# Options for HTML output | ||
HTML_THEME = "sphinx_rtd_theme" | ||
|
||
# Add any paths that contain custom static files (such as style sheets) here, | ||
# relative to this directory. They are copied after the builtin static files, | ||
# so a file named "default.css" will overwrite the builtin "default.css". | ||
html_static_path = ["_static"] | ||
# Paths for custom static files (like style sheets) | ||
HTML_STATIC_PATH = ["_static"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,83 @@ | ||
import logging | ||
import spacy | ||
import nltk | ||
from .cli import main | ||
from spacy.cli import download as spacy_download | ||
from typing import NoReturn | ||
|
||
nltk.download('punkt_tab') | ||
try: | ||
spacy.load("en_core_web_sm") | ||
except OSError: | ||
print( | ||
"Downloading the spacy en_core_web_sm model\n" | ||
"(don't worry, this will only happen once)" | ||
) | ||
from spacy.cli import download | ||
# Configure logging with a descriptive name for the logger | ||
logging.basicConfig( | ||
format="%(asctime)s - %(levelname)s - %(message)s", | ||
level=logging.INFO | ||
) | ||
logger = logging.getLogger("nlp_utils") | ||
|
||
download("en_core_web_sm") | ||
def download_nltk_resources() -> NoReturn: | ||
"""Download the necessary NLTK resources. | ||
main() | ||
Logs success or failure messages. | ||
""" | ||
try: | ||
with nltk.download('punkt'): | ||
logger.info("Successfully downloaded NLTK 'punkt' resource.") | ||
except Exception as error: | ||
logger.error("Failed to download NLTK resources: %s", error) | ||
raise | ||
|
||
|
||
def load_spacy_model(model_name: str = "en_core_web_sm") -> spacy.language.Language: | ||
"""Load and return the spaCy model, downloading it if necessary. | ||
Logs success or failure messages during the model loading process. | ||
Args: | ||
model_name (str): The name of the spaCy model to load. | ||
Returns: | ||
spacy.language.Language: The loaded spaCy model. | ||
""" | ||
try: | ||
model = spacy.load(model_name) | ||
logger.info("Successfully loaded spaCy model: %s", model_name) | ||
except OSError: | ||
logger.warning("spaCy model '%s' not found. Downloading...", model_name) | ||
spacy_download(model_name) | ||
model = spacy.load(model_name) | ||
logger.info("Successfully downloaded and loaded spaCy model: %s", model_name) | ||
except Exception as error: | ||
logger.error("Failed to load spaCy model: %s", error) | ||
raise | ||
return model | ||
|
||
|
||
def execute_cli_main() -> None: | ||
"""Execute the 'main' function from the CLI module. | ||
Logs success or failure messages about the import process and execution. | ||
""" | ||
try: | ||
from cli import main as cli_main | ||
logger.info("Successfully imported 'main' from 'cli' module.") | ||
except ImportError as import_error: | ||
logger.error("ImportError: %s", import_error) | ||
try: | ||
import cli | ||
cli_main = cli.main | ||
logger.info("Successfully imported 'cli' module directly.") | ||
except ImportError as inner_import_error: | ||
logger.critical("Failed to import 'cli' module: %s", inner_import_error) | ||
raise | ||
cli_main() | ||
|
||
|
||
def main() -> None: | ||
"""Main function to set up resources and execute the CLI. | ||
Ensures proper logging and execution flow. | ||
""" | ||
download_nltk_resources() | ||
load_spacy_model() | ||
execute_cli_main() | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,27 +1,43 @@ | ||
import spacy | ||
|
||
try: | ||
spacy.load("en_core_web_sm") | ||
except OSError: | ||
print( | ||
"Downloading the spacy en_core_web_sm model\n" | ||
"(don't worry, this will only happen once)" | ||
) | ||
from spacy.cli import download | ||
|
||
download("en_core_web_sm") | ||
|
||
from .script_arguments import ScriptArguments | ||
from .generation import generation | ||
|
||
# from .to_sheet import to_sheet | ||
# from .to_sheet_std import to_sheet_std | ||
from spacy.cli import download | ||
from transformers import HfArgumentParser | ||
from dotenv import load_dotenv | ||
|
||
from script_arguments import ScriptArguments # Ensure this module is in the correct path | ||
from generation import generation # Ensure this module is in the correct path | ||
|
||
def ensure_spacy_model(model_name="en_core_web_sm"): | ||
""" | ||
Ensure the spaCy model is available. Download it if not present. | ||
""" | ||
try: | ||
spacy.load(model_name) | ||
print(f"spaCy model '{model_name}' is already installed.") | ||
except OSError: | ||
print(f"spaCy model '{model_name}' not found. Downloading...") | ||
download(model_name) | ||
print(f"spaCy model '{model_name}' has been downloaded and installed.") | ||
|
||
def main(): | ||
""" | ||
Main function to: | ||
1. Load environment variables from a .env file. | ||
2. Ensure the spaCy model is available. | ||
3. Parse command-line arguments. | ||
4. Execute the generation function with the parsed arguments. | ||
""" | ||
# Load environment variables | ||
load_dotenv() | ||
|
||
# Ensure spaCy model is available | ||
ensure_spacy_model() | ||
|
||
# Parse command-line arguments | ||
parser = HfArgumentParser(ScriptArguments) | ||
args = parser.parse_args_into_dataclasses()[0] | ||
|
||
# Execute the generation function with parsed arguments | ||
generation(args) | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
"""Module providing a function printing python version.""" | ||
from .dataset import DatasetWrapper | ||
|
||
__all__ = [ | ||
|
Oops, something went wrong.