Skip to content

Commit

Permalink
0.3.0 Meilleure gestion des sous-repertories.
Browse files Browse the repository at this point in the history
  • Loading branch information
user.name committed Nov 19, 2020
1 parent 0f8f144 commit cfe5877
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 39 deletions.
3 changes: 3 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

## epubpub_get.py

### Version 0.03.0 (2020-11-19)
- [CHANGE] Meilleure gestion des sous-dossiers pour certains epub.

### Version 0.02.1 (2020-11-19)
- [CHANGE] Plante quand il y a trop d'erreurs 404 sur le même fichier, afin d'éviter d'avoir un ePub incomplet.

Expand Down
84 changes: 45 additions & 39 deletions epubpub_get.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
__version__ = "0.02.1"
__version__ = "0.03.0"
"""
Source : https://github.com/izneo-get/epubpub-get
Expand All @@ -18,7 +18,7 @@


def requests_retry_session(
retries=10,
retries=5,
backoff_factor=1,
status_forcelist=(401, 402, 403, 404, 500, 502, 504),
session=None,
Expand All @@ -37,7 +37,12 @@ def requests_retry_session(
session.mount("https://", adapter)
return session


if __name__ == "__main__":
base_url_to_remove = (
"https://asset.epub.pub/epub/" # La partie à supprimer pour l'arborescence.
)
output_folder = "DOWNLOADS"

# Récupération de l'URL du livre souhaité (si pas en argument, on le demande).
requested_url = ""
Expand Down Expand Up @@ -86,13 +91,6 @@ def requests_retry_session(
os.system("pause")
sys.exit()
print(" OK")
output_folder = "DOWNLOADS"
if not os.path.exists(output_folder):
os.mkdir(output_folder)

output_folder = output_folder + "/" + requested_url.split("/")[-1]
if not os.path.exists(output_folder):
os.mkdir(output_folder)

remove_sponsor = True

Expand All @@ -101,8 +99,12 @@ def requests_retry_session(
print("Format inattendu...")
os.system("pause")
sys.exit()

with open(output_folder + "/content.opf", "wb") as f:

# Création du répertoire de destination.
file_name = url.replace(base_url_to_remove, "")
os.makedirs(os.path.dirname(output_folder + "/" + file_name), exist_ok=True)

with open(output_folder + "/" + file_name, "wb") as f:
f.write(response.content)
soup = BeautifulSoup(response.text, "html.parser")

Expand All @@ -123,19 +125,13 @@ def requests_retry_session(
if response.status_code == 200:
print(" OK")
# Création du répertoire de destination.
folders = file_name.split("/")
mid_path = ""
for elem in folders[:-1]:
mid_path += elem
if not os.path.exists(output_folder + "/" + mid_path):
os.mkdir(output_folder + "/" + mid_path)
mid_path += "/"

file_name = src.replace(base_url_to_remove, "")
os.makedirs(os.path.dirname(output_folder + "/" + file_name), exist_ok=True)
to_write = response.content
if remove_sponsor and file_name.split(".")[-1].lower() in (
"html",
"htm",
"xhtml"
"xhtml",
):
to_write = re.sub(
r"<div id=\"sponsor\">(.+?)</div>", "", response.text
Expand All @@ -150,7 +146,7 @@ def requests_retry_session(
# On récupère en plus les fichiers supplémentaires.
for e in ["mimetype", "META-INF/container.xml"]:
file_name = e
new_url_base = url_base.split(".epub/")[0] + '.epub/'
new_url_base = url_base.split(".epub/")[0] + ".epub/"
src = new_url_base + file_name
print(src, end="")
try:
Expand All @@ -162,19 +158,13 @@ def requests_retry_session(
if response.status_code == 200:
print(" OK")
# Création du répertoire de destination.
folders = file_name.split("/")
mid_path = ""
for elem in folders[:-1]:
mid_path += elem
if not os.path.exists(output_folder + "/" + mid_path):
os.mkdir(output_folder + "/" + mid_path)
mid_path += "/"

file_name = src.replace(base_url_to_remove, "")
os.makedirs(os.path.dirname(output_folder + "/" + file_name), exist_ok=True)
to_write = response.content
if remove_sponsor and file_name.split(".")[-1].lower() in (
"html",
"htm",
"xhtml"
"xhtml",
):
to_write = re.sub(
r"<div id=\"sponsor\">(.+?)</div>", "", response.text
Expand All @@ -186,18 +176,34 @@ def requests_retry_session(
print(f" Erreur {response.status_code}")
total_errors = total_errors + 1

if os.path.isfile(output_folder + ".epub"):
print("Problème : '" + output_folder + ".epub' existe déjà.")
print("On s'arrête là.")
epub_name = url_base.split(".epub/")[0].replace(base_url_to_remove, "") + ".epub"

# On vérifie si le fichier epub existe.
if os.path.isfile(output_folder + "/" + epub_name):
print(
"Problème : le fichier '"
+ output_folder
+ "/"
+ epub_name
+ "' existe déjà."
)
print("On s'arrête là. Les fichiers temporaires sont conservés mais aucun epub n'a été compilé.")
else:
print("Création de l'ePub", end="")
shutil.make_archive(output_folder, "zip", output_folder)
print(" OK")
os.rename(output_folder + ".zip", output_folder + ".epub")
if total_errors == 0:
shutil.rmtree(output_folder)
print("Création de l'ePub", end="")
shutil.make_archive(
output_folder + "/" + epub_name, "zip", output_folder + "/" + epub_name
)
shutil.rmtree(output_folder + "/" + epub_name)
os.rename(
output_folder + "/" + epub_name + ".zip",
output_folder + "/" + epub_name,
)
print(" OK")
else:
print(f"Il y a eu {total_errors} erreurs.")
print(
f"Il y a eu {total_errors} erreur(s). Les fichiers temporaires sont conservés mais aucun epub n'a été compilé."
)

# Pause pour que l'utilisateur ait le temps de lire la sortie.
os.system("pause")

0 comments on commit cfe5877

Please sign in to comment.