-
Notifications
You must be signed in to change notification settings - Fork 12
/
app.py
110 lines (89 loc) · 3.66 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from __future__ import unicode_literals
from flask import Flask,render_template,url_for,request
from spacy_summarization import text_summarizer
from gensim.summarization import summarize
from nltk_summarization import nltk_summarizer
import time
import spacy
nlp = spacy.load("en_core_web_sm")
app = Flask(__name__)
# Web Scraping Pkg
from bs4 import BeautifulSoup
from urllib.request import urlopen
# Sumy Pkg
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lex_rank import LexRankSummarizer
# Sumy
def sumy_summary(docx):
parser = PlaintextParser.from_string(docx,Tokenizer("english"))
lex_summarizer = LexRankSummarizer()
summary = lex_summarizer(parser.document,3)
summary_list = [str(sentence) for sentence in summary]
result = ' '.join(summary_list)
return result
# Reading Time
def readingTime(mytext):
total_words = len([ token.text for token in nlp(mytext)])
estimatedTime = total_words/200.0
return estimatedTime
# Fetch Text From Url
def get_text(url):
page = urlopen(url)
soup = BeautifulSoup(page)
fetched_text = ' '.join(map(lambda p:p.text,soup.find_all('p')))
return fetched_text
@app.route('/')
def index():
return render_template('index.html')
@app.route('/analyze',methods=['GET','POST'])
def analyze():
start = time.time()
if request.method == 'POST':
rawtext = request.form['rawtext']
final_reading_time = readingTime(rawtext)
final_summary = text_summarizer(rawtext)
summary_reading_time = readingTime(final_summary)
end = time.time()
final_time = end-start
return render_template('index.html',ctext=rawtext,final_summary=final_summary,final_time=final_time,final_reading_time=final_reading_time,summary_reading_time=summary_reading_time)
@app.route('/analyze_url',methods=['GET','POST'])
def analyze_url():
start = time.time()
if request.method == 'POST':
raw_url = request.form['raw_url']
rawtext = get_text(raw_url)
final_reading_time = readingTime(rawtext)
final_summary = text_summarizer(rawtext)
summary_reading_time = readingTime(final_summary)
end = time.time()
final_time = end-start
return render_template('index.html',ctext=rawtext,final_summary=final_summary,final_time=final_time,final_reading_time=final_reading_time,summary_reading_time=summary_reading_time)
@app.route('/compare_summary')
def compare_summary():
return render_template('compare_summary.html')
@app.route('/comparer',methods=['GET','POST'])
def comparer():
start = time.time()
if request.method == 'POST':
rawtext = request.form['rawtext']
final_reading_time = readingTime(rawtext)
final_summary_spacy = text_summarizer(rawtext)
summary_reading_time = readingTime(final_summary_spacy)
# Gensim Summarizer
final_summary_gensim = summarize(rawtext)
summary_reading_time_gensim = readingTime(final_summary_gensim)
# NLTK
final_summary_nltk = nltk_summarizer(rawtext)
summary_reading_time_nltk = readingTime(final_summary_nltk)
# Sumy
final_summary_sumy = sumy_summary(rawtext)
summary_reading_time_sumy = readingTime(final_summary_sumy)
end = time.time()
final_time = end-start
return render_template('compare_summary.html',ctext=rawtext,final_summary_spacy=final_summary_spacy,final_summary_gensim=final_summary_gensim,final_summary_nltk=final_summary_nltk,final_time=final_time,final_reading_time=final_reading_time,summary_reading_time=summary_reading_time,summary_reading_time_gensim=summary_reading_time_gensim,final_summary_sumy=final_summary_sumy,summary_reading_time_sumy=summary_reading_time_sumy,summary_reading_time_nltk=summary_reading_time_nltk)
@app.route('/about')
def about():
return render_template('index.html')
if __name__ == '__main__':
app.run(debug=True)