-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
128 lines (107 loc) · 4.83 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import re
import requests
from bs4 import BeautifulSoup
def fetch_html_content(url):
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(url, headers=headers)
response.raise_for_status()
return response.text
except requests.RequestException as e:
print(f"An error occurred while fetching the URL: {e}")
return None
def extract_movie_details(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
movies = []
for movie_item in soup.find_all('li', class_=lambda x: x != 'adspace-lb'):
movie = {}
# Extract title
title_elem = movie_item.find('a', class_='title')
if title_elem and title_elem.h3:
movie['title'] = title_elem.h3.text.strip()
else:
continue # Skip this movie if no title is found
# Extract year and language
info_elem = movie_item.find('div', class_='info')
if info_elem and info_elem.p:
info_text = info_elem.p.text.strip()
year_match = re.search(r'\d{4}', info_text)
if year_match:
movie['year'] = year_match.group()
# Extract synopsis
synopsis_elem = movie_item.find('p', class_='synopsis')
if synopsis_elem:
movie['synopsis'] = synopsis_elem.text.strip()
# Extract professionals
professionals = []
for prof_elem in movie_item.find_all('div', class_='prof'):
name_elem = prof_elem.find('p')
role_elem = prof_elem.find('label')
if name_elem and role_elem:
professionals.append({
'name': name_elem.text.strip(),
'role': role_elem.text.strip()
})
movie['professionals'] = professionals
# Extract ratings
ratings = {}
ratings_ul = movie_item.find('ul', class_='average-rating')
if ratings_ul:
for rating_elem in ratings_ul.find_all('li'):
label_elem = rating_elem.find('label')
value_elem = rating_elem.find('p')
if label_elem and value_elem:
ratings[label_elem.text.strip()] = value_elem.text.strip()
movie['ratings'] = ratings
# Extract extras (IMDb and Trailer links)
extras = {}
extras_elem = movie_item.find('div', class_='extras')
if extras_elem:
for link in extras_elem.find_all('a'):
if 'Wiki' in link.text:
extras['imdb'] = link['href']
elif 'Trailer' in link.text:
if 'href' in link.attrs:
extras['trailer'] = link['href']
movie['extras'] = extras
movies.append(movie)
return movies
from datetime import datetime
def write_to_readme(movies):
updated_date = datetime.now().strftime("%Y-%m-%d")
with open('README.md', 'w', encoding='utf-8') as f:
f.write(f"# Watch List (Updated: {updated_date})\n\n")
for idx, movie in enumerate(movies, 1):
if movie: # Only write non-empty movie dictionaries
f.write(f"### {idx}. **{movie.get('title', 'Unknown Title')}** ({movie.get('year', 'N/A')}) ")
# Write IMDb and Trailer links if available
extras = movie.get('extras', {})
links = []
if 'imdb' in extras:
links.append(f"[IMDb]({extras['imdb']})")
if 'trailer' in extras:
links.append(f"[Trailer]({extras['trailer']})")
f.write(', '.join(links))
f.write("\n\n")
f.write(f"- **Synopsis**: {movie.get('synopsis', 'N/A')}\n\n")
# Writing professionals in specified format
professionals = {}
for prof in movie.get('professionals', []):
if prof['role'] in professionals:
professionals[prof['role']].append(prof['name'])
else:
professionals[prof['role']] = [prof['name']]
for role, names in professionals.items():
f.write(f"- **{role}**: {', '.join(names)}\n")
f.write("\n---\n\n")
# Example usage
url = "https://einthusan.tv/movie/results/?find=Popularity&lang=malayalam&ptype=view&tp=l30d"
html_content = fetch_html_content(url)
if html_content:
movie_details = extract_movie_details(html_content)
write_to_readme(movie_details)
print("Movie details have been written to README.md")
else:
print("Failed to fetch HTML content. Please check the URL and your internet connection.")