-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
129 lines (100 loc) · 5.11 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
from colorama import Fore, Style, init
from tqdm import tqdm
# Initialize colorama
init()
def download_images(url, path):
# Clear previous logs
global log_messages
log_messages = []
# List of image extensions
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.tiff', '.bmp', '.ico', '.psd', '.ai', '.apng', '.avif',
'.jfif', '.pjpeg', '.pjp', '.webp']
non_image_files = [] # List to store non-image files with 'src' attribute
# Extract domain name from the url and create a new path
domain_name = urlparse(url).netloc
path = os.path.join(path, domain_name)
try:
# Send a GET request to the URL
response = requests.get(url)
response.raise_for_status() # Raise an exception if the GET request was unsuccessful
log_messages.append(Fore.GREEN + f"Successfully retrieved HTML content from {url}" + Style.RESET_ALL)
# Get the content of the response
page_content = response.content
# Create a BeautifulSoup object and specify the parser
soup = BeautifulSoup(page_content, 'html.parser')
# Find all image tags
images = soup.find_all('img')
log_messages.append(Fore.GREEN + f"Found {len(images)} image tags on the page" + Style.RESET_ALL)
# Create the directory if it doesn't already exist
if not os.path.exists(path):
os.makedirs(path)
log_messages.append(Fore.GREEN + f"Created directory: {path}" + Style.RESET_ALL)
else:
log_messages.append(
Fore.YELLOW + f"Directory already exists: {path}. Images will be downloaded into this directory." +
Style.RESET_ALL)
# Download each image
for i, image in enumerate(tqdm(images, desc="Downloading images", unit="image", position=0, leave=True)):
try:
# Get the URL of the image
image_url = image.get('src')
# Check if the URL is valid and starts with 'http' or 'https'
if image_url and image_url.startswith(('http', 'https')):
# Use urljoin to handle relative URLs
full_image_url = urljoin(url, image_url)
elif image_url and image_url.startswith('/'):
# Handle relative URLs that start with '/'
full_image_url = urljoin(url, image_url)
else:
# Skip this image if the URL is not valid
log_messages.append(
Fore.RED + f"Skipping image {i + 1} due to invalid URL: {image_url}" + Style.RESET_ALL)
continue
# Check if the URL is an image by looking at the extension
if not any(full_image_url.endswith(ext) for ext in image_extensions):
non_image_files.append(full_image_url)
log_messages.append(
Fore.RED + f"Skipping non-image file {i + 1}: {full_image_url}" + Style.RESET_ALL)
continue
# Send a GET request to the image URL
with requests.get(full_image_url, stream=True) as image_response:
image_response.raise_for_status() # Raise an exception if the GET request was unsuccessful
# Get the content of the response
image_content = image_response.content
# Update the image name from the URL
image_name = urlparse(full_image_url).path.split('/')[-1]
# If the image name is empty, use a default name
if not image_name.strip():
image_name = f'image_{i}.jpg'
# Open the file in write mode
with open(os.path.join(path, image_name), 'wb') as f:
# Write the image content to the file
f.write(image_content)
log_messages.append(Fore.GREEN + f"{image_name} saved successfully" + Style.RESET_ALL)
except requests.exceptions.RequestException as err:
log_messages.append(Fore.RED + f"Error downloading image {i + 1}: {err}" + Style.RESET_ALL)
log_messages.append(
Fore.GREEN + f"\nDownload completed. {len(non_image_files)} non-image files were skipped." + Style.RESET_ALL)
except requests.exceptions.RequestException as err:
log_messages.append(Fore.RED + f"Error during download: {err}" + Style.RESET_ALL)
# Main function
def main():
while True:
try:
url = input("Enter the URL (or 'exit' or Ctrl+C to quit): ")
if url.lower() == 'exit':
break
path = 'images'
download_images(url, path)
for message in log_messages:
print(message)
except KeyboardInterrupt:
print("\nProgram terminated by user (Ctrl+C).")
break
# Call the function to download images from a URL and save them to a directory
if __name__ == '__main__':
main()