-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
52 lines (42 loc) · 1.74 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#Tanishka Ghosh
#Created: 2024-01-11
#Last Updated: 2024-01-11
#Webscrapper that takes in a word or phrase from the user, which is then
#searched in Wikipedia and the first paragraph is outputted as a summary
#of the topic. If there is an error or if there is more than one possible
#entries for the search term, the first possible entry listed on the
#error page is used.
from bs4 import BeautifulSoup
import requests
#ask the user what they would like to learn about
print("Hello! What would you like to learn more about?")
word = input()
def find(word):
#the start of all wikipedia url's, from where data is being scrapped
url = "https://en.wikipedia.org/wiki/"
#replace any space in the word with a underscore for the wikipedia url
url = url + word.replace(" ", "_")
#get the html from the url to parse it
html = requests.get(url)
soup = BeautifulSoup(html.content, 'html.parser')
summary = ""
try:
#find the main body of the page
body = soup.find("div", {"class": "mw-content-ltr mw-parser-output"})
#find all the paragraphs of the main body
paras = body.find_all("p")
#get the first paragraph containing actual content (more than 50 characters)
for para in paras:
#if there are multiple pages, pick the first one
if "refer to" in para.text:
find(body.find("ul").find("a").get('title'))
break
if len(para.text) > 40:
summary = para.text
break
except AttributeError:
summary = "There seems to be an error, try a different word";
#output the summary found to the user
print(summary)
#call the find function to find what the user wants
find(word)