-
Notifications
You must be signed in to change notification settings - Fork 1
/
jobbiestesting.py
168 lines (126 loc) · 5.32 KB
/
jobbiestesting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import logging
import requests
import json
import os
from bs4 import BeautifulSoup
from feedgen.feed import FeedGenerator
from flask import Flask, make_response
app = Flask(__name__)
urls = [ 'https://www.flexjobs.com/jobs', 'https://remote.co/remote-jobs', 'https://workingnomads.co/jobs', 'https://weworkremotely.com', 'https://jobspresso.co/browsejobs', 'https://www.virtualvocations.com/jobs', 'https://www.freelancer.com/jobs', 'https://www.skipthedrive.com/jobs', 'https://www.peopleperhour.com/freelance-jobs', 'https://www.guru.com/d/jobs']
logging.basicConfig(filename='app.log', level=logging.DEBUG)
def scrape_flexjobs(url):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
job_listings = []
job_elems = soup.select('div.job.row.result')
for job_elem in job_elems:
job_title_elem = job_elem.select_one('div.col-md-5 > h5 > a')
job_title = job_title_elem.get_text(strip=True)
job_url = f"https://www.flexjobs.com{job_title_elem['href']}"
company_elem = job_elem.select_one('div.col-md-3 > span > a')
company = company_elem.get_text(strip=True)
date_elem = job_elem.select_one('div.col-md-2 > span')
date = date_elem.get_text(strip=True)
job_listings.append({
'title': job_title,
'company': company,
'date': date,
'url': job_url
})
return job_listings
def scrape_remote_co(url):
logging.info("Scraping remote.co")
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.content, "html.parser")
jobs_container = soup.find("section", class_="jobs")
if jobs_container is None:
logging.error("Jobs container not found on remote.co")
return []
job_listings = jobs_container.find_all("div", class_="card")
for job in job_listings:
job_title = job.find("h2", class_="card-title").text.strip()
job_url = job.find("a", class_="card-link")["href"]
company = job.find("h3", class_="card-subtitle").text.strip()
date = job.find("div", class_="job-date").text.strip()
job_listings.append({
'title': job_title,
'company': company,
'date': date,
'url': job_url
})
else:
logging.error(f"Failed to fetch the remote.co webpage. Status code: {response.status_code}")
return []
return job_listings
def scrape_working_nomads(url):
logging.info("Scraping workingnomads.co")
job_listings = []
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
job_listings_section = soup.find("section", class_="jobs")
if job_listings_section is None:
logging.error("Job listings section not found on workingnomads.co")
return []
job_listings = job_listings_section.find_all("li")
for job in job_listings:
title = job.find("h2", class_="title").text.strip()
company = job.find("span", class_="company").text.strip()
job_link = job.find("a", class_="job-link")
if job_link:
job_url = "https://workingnomads.co" + job_link["href"]
else:
job_url = ""
job_date = job.find("span", class_="date").text.strip()
job_listings.append({
'title': title,
'company': company,
'date': job_date,
'url': job_url
})
else:
logging.error(f"Failed to fetch the workingnomads.co webpage. Status code: {response.status_code}")
return []
return job_listings
# You can continue adding scraping functions for the other URLs in the list.
# Remember to call them in the main function and aggregate the results.
def main():
job_listings = []
for url in urls:
if 'flexjobs' in url:
job_listings += scrape_flexjobs(url)
elif 'remote.co' in url:
job_listings += scrape_remote_co(url)
elif 'workingnomads.co' in url:
job_listings += scrape_working_nomads(url)
# Add more elif blocks for the other websites
job_listings = [j for j in job_listings if j is not None]
if len(job_listings) == 0:
logging.warning("No job listings found.")
return ""
fg = FeedGenerator()
fg.title("Remote Job Listings")
fg.description("Aggregated remote job listings from various websites.")
fg.link(href="http://localhost:5000/rss")
for job in job_listings:
fe = fg.add_entry()
fe.title(job['title'])
fe.link(href=job['url'])
fe.description(f"Company: {job['company']} | Date: {job['date']}")
rssfeed = fg.rss_str(pretty=True)
with open("remote_job_listings.xml", "wb") as f:
f.write(rssfeed)
return rssfeed
@app.route('/rss')
def rss():
try:
rssfeed = main()
except Exception as e:
logging.exception("Error occurred while generating RSS feed:")
return f"An error occurred: {str(e)}", 500
response = make_response(rssfeed)
response.headers.set("Content-Type", "application/rss+xml")
return response
if __name__ == '__main__':
app.run()