Skip to content

Commit

Permalink
Merge pull request #7 from MrSampson/main
Browse files Browse the repository at this point in the history
Add support for other languages
  • Loading branch information
kevin91nl authored May 11, 2024
2 parents 9ed32f2 + 800a825 commit d8de40c
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 4 deletions.
18 changes: 18 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"

[packages]
argparse = "==1.4.0"
sparqlwrapper = "==1.8.5"
pandas = "==1.2.4"
rdflib = "==6.0.2"
requests = "==2.25.1"
bs4 = "==0.0.1"
beautifulsoup4 = "==4.9.3"

[dev-packages]

[requires]
python_version = "3.12"
13 changes: 9 additions & 4 deletions eurlex/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,13 +321,15 @@ def simplify_iri(iri: str) -> str:
return iri


def get_html_by_cellar_id(cellar_id: str) -> str:
def get_html_by_cellar_id(cellar_id: str, language: str = "en") -> str:
"""Retrieve HTML by CELLAR ID.
Parameters
----------
cellar_id : str
The CELLAR ID to find HTML for.
language : str
The language to retrieve the HTML in (default: "en").
Returns
-------
Expand All @@ -342,20 +344,23 @@ def get_html_by_cellar_id(cellar_id: str) -> str:
allow_redirects=True,
headers={ # pragma: no cover
"Accept": "text/html,application/xhtml+xml,application/xml", # pragma: no cover
"Accept-Language": "en", # pragma: no cover
"Accept-Language": f"{language}", # pragma: no cover
},
) # pragma: no cover
html = response.content.decode("utf-8") # pragma: no cover
return html # pragma: no cover


def get_html_by_celex_id(celex_id: str) -> str:
def get_html_by_celex_id(celex_id: str, language: str = "en") -> str:
"""Retrieve HTML by CELEX ID.
Parameters
----------
celex_id : str
The CELEX ID to find HTML for.
language : str
The language to retrieve the HTML in (default: "en").
Returns
-------
Expand All @@ -370,7 +375,7 @@ def get_html_by_celex_id(celex_id: str) -> str:
allow_redirects=True,
headers={ # pragma: no cover
"Accept": "text/html,application/xhtml+xml,application/xml", # pragma: no cover
"Accept-Language": "en", # pragma: no cover
"Accept-Language": f"{language}", # pragma: no cover
},
) # pragma: no cover
html = response.content.decode("utf-8") # pragma: no cover
Expand Down

0 comments on commit d8de40c

Please sign in to comment.