forked from AlexPoulsen/ninsheetmusic_scraper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathninsheetmusic.py
executable file
·76 lines (70 loc) · 1.71 KB
/
ninsheetmusic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import requests
import pathlib
import sys
import os
import errno
import urllib
import re
def get_filename_from_cd(cd, counter):
"""
Get filename from content-disposition
"""
if not cd:
return None
fname = re.findall('filename=(.+)', cd)
if len(fname) == 0:
return "unnamed_" + str(counter) + ".mid"
out = fname[0].split("\"", 1)[1].rsplit("\"", 1)[0]
if out is None:
out = "unnamed_" + str(counter) + ".mid"
return out
dl = []
counter = 1
print("4129 ", end="")
for n in range(0, 4129):
dl.append("https://www.ninsheetmusic.org/download/mid/" + str(n + 1))
print("|", end="")
if counter % 120 == 0:
print("\n", end=" ")
counter += 1
print("\n")
path = pathlib.Path(".") / "ninsheetmusic_sources"
try:
os.makedirs(path)
except OSError as e:
if e.errno != errno.EEXIST:
raise
counter = 1
failed = []
print("downloading files")
print(" ", end="")
for file in dl:
new_filename = get_filename_from_cd(urllib.request.urlopen(file).headers.get('content-disposition'), counter)
if new_filename is not None:
urllib.request.urlretrieve(file, str(path / new_filename))
else:
failed.append([file, str(path + "/" + "unnamed_" + str(counter) + ".mid")])
print("|", end="")
if counter % 120 == 0:
print("\n", end=" ")
counter += 1
print("\n")
failed_answer = input("Retry files with a failed name retrieve? [y/N] ")
if failed_answer.lower() == "y":
for file in failed:
urllib.request.urlretrieve(*file)
print("|", end="")
if counter % 120 == 0:
print("\n", end=" ")
counter += 1
print("\n")
elif failed_answer.lower() == "n":
pass
else:
for file in failed:
urllib.request.urlretrieve(*file)
print("|", end="")
if counter % 120 == 0:
print("\n", end=" ")
counter += 1
print("\n")