-
Notifications
You must be signed in to change notification settings - Fork 3
/
searchFunction.py
93 lines (77 loc) · 2.42 KB
/
searchFunction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import requests
from ml import svm
import json
import NLProcessor as nlp
import lxml.html
from requests import get
from goose import Goose
def getSuggestions(query):
url = 'https://api.cognitive.microsoft.com/bing/v5.0/suggestions/?q=' + query
headers = {'Ocp-Apim-Subscription-Key':'854e8088bb8347418e6f934b996487af'}
r = requests.get(url, headers = headers)
results = []
suggestions = r.json()['suggestionGroups']
max = 3
for suggestion in suggestions:
s = suggestion['searchSuggestions']
for term in s:
if max == 0:
break
max-=1
results.append(str(term['query'].encode("ascii", "ignore")))
return results
def manualSearch(query):
url = 'https://api.cognitive.microsoft.com/bing/v5.0/news/search?q=' + query
# query string parameters
payload = {'q': query, 'freshness':'Week'}
# custom headers
headers = {'Ocp-Apim-Subscription-Key': '22207001cbdc4c2487ad91d1cec1bdf2'}
r = requests.get(url, params=payload, headers=headers)
links = []
descriptions = []
print(r.json())
try:
listOfArticles = r.json()['value']
except:
return []
max = 5
for article in listOfArticles:
if('clusteredArticles' in article):
information = article['clusteredArticles']
else:
information = article
thisList = []
if max == 0:
break
max-=1
if(type(information) is dict):
links.append(information['url'])
descriptions.append(str(information['description'].encode("ascii", "ignore")))
fin = []
rating = 0.0
i = 0
for link in links:
thisDict = {}
rating = svm.compute(link)
thisDict['id'] = str(i+1)
thisDict['description'] = descriptions[i]
thisDict['url'] = link
thisDict['score'] = str(rating)
fin.append(thisDict)
i = i + 1
return json.dumps(fin)
def processURL(url):
toReturn = {}
score = svm.compute(url)
t = lxml.html.parse(url)
title = t.find(".//title").text
response = get(url)
extractor = Goose()
article = extractor.extract(raw_html=response.content)
file = article.cleaned_text
keywords = nlp.generateEntity(file)
toReturn['title'] = title
toReturn['score'] = score
toReturn['keywords'] = keywords
toReturn['url'] = url
return json.dumps(toReturn)