-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathviews.py
73 lines (56 loc) · 2.3 KB
/
views.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import requests
from urllib.parse import urljoin
from django.shortcuts import render, redirect
from bs4 import BeautifulSoup as BSoup
from news.models import Headline
from django.http import HttpResponse
from django.views import View
requests.packages.urllib3.disable_warnings()
class HeadlinesView(View):
template_name = 'views/home.html'
def get(self, request, *args, **kwargs):
scrape()
headlines = Headline.objects.all()[::-1]
context = {
'object_list': headlines,
}
return render(request, "home.html", context)
def scrape():
# Delete all previous headlines, they may be outdated.
Headline.objects.all().delete()
# Fill the database up again.
# Motortrend
motorTrend = "https://www.motortrend.com/"
session = requests.Session()
content = session.get(motorTrend + "auto-news/", verify=False).content
soup = BSoup(content, "html.parser")
# Find all instances of the proper class which represents a headline.
News = soup.find_all('a', class_="_22VtJ")
for article in News:
# Parse out the data out of the html.
title = article.getText()
link = urljoin(motorTrend, article['href'])
# Create a Headline object, and store pertinent info.
new_headline = Headline()
new_headline.title = title
new_headline.url = link
# Save the headline into the database.
new_headline.save()
# The Drive
theDrive = "https://www.thedrive.com/"
content = session.get(theDrive + "the-war-zone/", verify=False).content
soup = BSoup(content, "html.parser")
# Find all instances of the proper class which represents a headline.
News = soup.find_all('a', class_="MuiBox-root css-3f60fj")
for article in News:
# Parse out the data out of the html.
title = article.find(
'h3', class_="MuiTypography-root MuiTypography-h5 css-z31x94").getText()
link = urljoin(theDrive, article['href'])
# Create a Headline object, and store pertinent info.
new_headline = Headline()
new_headline.title = title
new_headline.url = link
# Save the headline into the database.
new_headline.save()
return redirect("../")