From d6b14083183e8d08858979d4aa26ecb5b9fa0711 Mon Sep 17 00:00:00 2001 From: Henadzi Simonovich <375295716621@yandex.by> Date: Thu, 2 Aug 2018 20:47:14 +0300 Subject: [PATCH 1/4] Create an web-app for comments analysis --- 2230/3/Gemfile | 9 ++ 2230/3/Gemfile.lock | 99 +++++++++++++++++++ 2230/3/README.md | 25 +++++ .../app/controllers/application_controller.rb | 11 +++ 2230/3/app/controllers/articles_controller.rb | 34 +++++++ 2230/3/app/helpers/azure_sender.rb | 47 +++++++++ 2230/3/app/helpers/html_parser.rb | 29 ++++++ 2230/3/app/helpers/json_parser.rb | 36 +++++++ 2230/3/app/models/article.rb | 6 ++ 2230/3/app/models/comment.rb | 5 + 2230/3/app/views/articles/new.htm | 14 +++ 2230/3/app/views/articles/new.slim | 20 ++++ 2230/3/app/views/articles/show.htm | 35 +++++++ 2230/3/app/views/articles/show.slim | 45 +++++++++ 2230/3/app/views/comments/show.htm | 28 ++++++ 2230/3/app/views/comments/show.slim | 37 +++++++ 2230/3/app/views/example.slim | 27 +++++ 2230/3/app/views/index.slim | 35 +++++++ 2230/3/config.ru | 11 +++ 2230/3/config.yml | 1 + 20 files changed, 554 insertions(+) create mode 100644 2230/3/Gemfile create mode 100644 2230/3/Gemfile.lock create mode 100644 2230/3/README.md create mode 100644 2230/3/app/controllers/application_controller.rb create mode 100644 2230/3/app/controllers/articles_controller.rb create mode 100644 2230/3/app/helpers/azure_sender.rb create mode 100644 2230/3/app/helpers/html_parser.rb create mode 100644 2230/3/app/helpers/json_parser.rb create mode 100644 2230/3/app/models/article.rb create mode 100644 2230/3/app/models/comment.rb create mode 100644 2230/3/app/views/articles/new.htm create mode 100644 2230/3/app/views/articles/new.slim create mode 100644 2230/3/app/views/articles/show.htm create mode 100644 2230/3/app/views/articles/show.slim create mode 100644 2230/3/app/views/comments/show.htm create mode 100644 2230/3/app/views/comments/show.slim create mode 100644 2230/3/app/views/example.slim create mode 100644 2230/3/app/views/index.slim create mode 100644 2230/3/config.ru create mode 100644 2230/3/config.yml diff --git a/2230/3/Gemfile b/2230/3/Gemfile new file mode 100644 index 000000000..1e9d8deba --- /dev/null +++ b/2230/3/Gemfile @@ -0,0 +1,9 @@ +source 'https://rubygems.org/' +gem 'capybara' +gem 'sinatra' +gem 'sinatra-config-file' +gem 'shotgun' +gem 'thin' +gem 'ohm' +gem 'slim' +gem 'pry' diff --git a/2230/3/Gemfile.lock b/2230/3/Gemfile.lock new file mode 100644 index 000000000..9f64efe95 --- /dev/null +++ b/2230/3/Gemfile.lock @@ -0,0 +1,99 @@ +GEM + remote: https://rubygems.org/ + specs: + activesupport (5.2.0) + concurrent-ruby (~> 1.0, >= 1.0.2) + i18n (>= 0.7, < 2) + minitest (~> 5.1) + tzinfo (~> 1.1) + addressable (2.5.2) + public_suffix (>= 2.0.2, < 4.0) + backports (3.11.3) + capybara (3.4.2) + addressable + mini_mime (>= 0.1.3) + nokogiri (~> 1.8) + rack (>= 1.6.0) + rack-test (>= 0.6.3) + xpath (~> 3.1) + coderay (1.1.2) + concurrent-ruby (1.0.5) + daemons (1.2.6) + eventmachine (1.2.7) + hiredis (0.6.1) + i18n (1.0.1) + concurrent-ruby (~> 1.0) + method_source (0.9.0) + mini_mime (1.0.0) + mini_portile2 (2.3.0) + minitest (5.11.3) + multi_json (1.13.1) + mustermann (1.0.2) + nest (3.1.1) + redic + nokogiri (1.8.4) + mini_portile2 (~> 2.3.0) + ohm (3.1.1) + nest (~> 3) + redic (~> 1.5.0) + stal + pry (0.11.3) + coderay (~> 1.1.0) + method_source (~> 0.9.0) + public_suffix (3.0.2) + rack (2.0.5) + rack-protection (2.0.3) + rack + rack-test (1.1.0) + rack (>= 1.0, < 3) + redic (1.5.0) + hiredis + shotgun (0.9.2) + rack (>= 1.0) + sinatra (2.0.3) + mustermann (~> 1.0) + rack (~> 2.0) + rack-protection (= 2.0.3) + tilt (~> 2.0) + sinatra-config-file (1.0) + sinatra-contrib + sinatra-contrib (2.0.3) + activesupport (>= 4.0.0) + backports (>= 2.8.2) + multi_json + mustermann (~> 1.0) + rack-protection (= 2.0.3) + sinatra (= 2.0.3) + tilt (>= 1.3, < 3) + slim (3.0.9) + temple (>= 0.7.6, < 0.9) + tilt (>= 1.3.3, < 2.1) + stal (0.3.0) + redic (~> 1.5) + temple (0.8.0) + thin (1.7.2) + daemons (~> 1.0, >= 1.0.9) + eventmachine (~> 1.0, >= 1.0.4) + rack (>= 1, < 3) + thread_safe (0.3.6) + tilt (2.0.8) + tzinfo (1.2.5) + thread_safe (~> 0.1) + xpath (3.1.0) + nokogiri (~> 1.8) + +PLATFORMS + ruby + +DEPENDENCIES + capybara + ohm + pry + shotgun + sinatra + sinatra-config-file + slim + thin + +BUNDLED WITH + 1.16.3 diff --git a/2230/3/README.md b/2230/3/README.md new file mode 100644 index 000000000..f9980a07f --- /dev/null +++ b/2230/3/README.md @@ -0,0 +1,25 @@ +### Описание + +Веб-приложение Onliner Analyzer использует Text Analytics API от Microsoft Azure, чтобы рассчитать коэффициент доброжелательности комментариев для статей onliner.by + +### Установка + +1. Клонируйте или скачайте файлы проекта +2. Зайдите в папку проекта и выполните +```bash +bundle install +sudo apt-get install redis-server +``` + +### Первый запуск + +1. Выполните в каталоге проекта: +```bash +shotgun config.ru +``` + +2. Откройте в браузере страницу `http://localhost:9393` + +### Примечания + +Azure - условно-бесплатная платформа, поэтому анализируется лишь 10 первых комментариев на статью. diff --git a/2230/3/app/controllers/application_controller.rb b/2230/3/app/controllers/application_controller.rb new file mode 100644 index 000000000..3a586be96 --- /dev/null +++ b/2230/3/app/controllers/application_controller.rb @@ -0,0 +1,11 @@ +require 'sinatra/config_file' + +class ApplicationController < Sinatra::Base + set :views, Proc.new { File.join(root, "../views") } + register Sinatra::ConfigFile + config_file '../../config.yml' + + get '/' do + slim :'index' + end +end diff --git a/2230/3/app/controllers/articles_controller.rb b/2230/3/app/controllers/articles_controller.rb new file mode 100644 index 000000000..0232aca18 --- /dev/null +++ b/2230/3/app/controllers/articles_controller.rb @@ -0,0 +1,34 @@ +require_relative './application_controller' + +class ArticlesController < ApplicationController + # index - show list of articles + get '/' do + # settings.views + @articles = Article.all.sort_by(:rating).reverse! + slim :'articles/show' + end + + # new - add new article + get '/new' do + slim :'articles/new' + end + + post '/' do + # very simple validation + redirect 'articles' unless params[:article_new].include?('onliner.by') + html_parser = HTMLParser.new(params[:article_new]) + comments_url = html_parser.run + comments = JSONParser.new(comments_url).comments + rating = AzureSender.new(comments, settings.access_key).run + article = Article.create url: params[:article_new], title: html_parser.article_title, rating: rating.sum / rating.size + comments.zip(rating).each do |obj| + Comment.create(text: obj.first, rating: obj.last, article: article) + end + redirect 'articles' + end + + get '/:id' do + @comments = Comment.find(article_id: params[:id]) + slim :'comments/show' + end +end diff --git a/2230/3/app/helpers/azure_sender.rb b/2230/3/app/helpers/azure_sender.rb new file mode 100644 index 000000000..d1fb4ebd8 --- /dev/null +++ b/2230/3/app/helpers/azure_sender.rb @@ -0,0 +1,47 @@ +require 'uri' +require 'net/https' +require 'json' + +class AzureSender + URL_TO_SERVICE = 'https://westeurope.api.cognitive.microsoft.com/text/analytics/v2.0/sentiment'.freeze + attr_reader :data, :uri, :request + + def initialize(comments, api_key) + @uri = URI(URL_TO_SERVICE) + @uri.query = URI.encode_www_form({}) + @data = { documents: [] } + prepare_data(comments) + @request = prepare_request(api_key) + end + + def run + prepare_output_data(send_request) + end + + private + + def prepare_data(comments) + comments.each_with_index do |comment, index| + @data[:documents] << { 'id' => index.to_s, 'language' => 'ru', 'text' => comment } + end + end + + def send_request + Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http| + http.request(request) + end + end + + def prepare_output_data(response) + JSON.parse(response.body)['documents'].map do |data| + ((data['score'] * 200).to_i - 100) + end + end + + # prepare data fro api + def prepare_request(api_key) + request = Net::HTTP::Post.new(uri, 'Content-Type' => 'application/json', 'Ocp-Apim-Subscription-Key' => api_key) + request.body = data.to_json + request + end +end diff --git a/2230/3/app/helpers/html_parser.rb b/2230/3/app/helpers/html_parser.rb new file mode 100644 index 000000000..72d9db643 --- /dev/null +++ b/2230/3/app/helpers/html_parser.rb @@ -0,0 +1,29 @@ +require 'open-uri' +require 'capybara' + +class HTMLParser + COMMENTS_API_BASE_PATH = 'https://comments.api.onliner.by'.freeze + COMMENTS_COUNT = '10'.freeze + attr_reader :article_url + attr_reader :comments_url, :article_title + + def initialize(article_url) + @article_url = article_url + @article_title = '' + end + + def run + page = Nokogiri::HTML(open(@article_url)) + @article_title = page.css('title').text + @comments_url = generate_comments_url(page.css('div#fast-comments app')) + end + + private + + def generate_comments_url(page) + File.join(COMMENTS_API_BASE_PATH, '/', page.at('app')['project-name'], '/', page.at('app')['entity-type'], '/', page.at('app')['entity-id'], "/comments?limit=#{COMMENTS_COUNT}") + end +end + +# url = HTMLParser.new('https://people.onliner.by/2018/07/26/profession', 3).run +# p url diff --git a/2230/3/app/helpers/json_parser.rb b/2230/3/app/helpers/json_parser.rb new file mode 100644 index 000000000..52b407f2b --- /dev/null +++ b/2230/3/app/helpers/json_parser.rb @@ -0,0 +1,36 @@ +require 'open-uri' +require 'json' + +class JSONParser + attr_reader :comments_url + attr_reader :comments + + def initialize(comments_url) + @comments_url = comments_url + @comments_json = [] + end + + def comments + #@comments_json = filter_response(send_comments_request) + @comments = send_comments_request + end + + private + + def send_comments_request + json = open(@comments_url).read + JSON.parse(json)['comments'].map do |comment| + comment['text'] + end + end + + def filter_response(response) + response.map do |elem| + elem unless (elem.select {|hash,value| hash['marks'] && value['likes'] > 0}) == {} + # elem unless (res == {}) + end + end +end + +#content = JSONParser.new('https://comments.api.onliner.by/news/people.post/570149/comments?limit=5').comments +#p content diff --git a/2230/3/app/models/article.rb b/2230/3/app/models/article.rb new file mode 100644 index 000000000..c7056fb2f --- /dev/null +++ b/2230/3/app/models/article.rb @@ -0,0 +1,6 @@ +class Article < Ohm::Model + attribute :url + attribute :title + attribute :rating + collection :comments, :Comment +end diff --git a/2230/3/app/models/comment.rb b/2230/3/app/models/comment.rb new file mode 100644 index 000000000..04e25a8e6 --- /dev/null +++ b/2230/3/app/models/comment.rb @@ -0,0 +1,5 @@ +class Comment < Ohm::Model + attribute :text + attribute :rating + reference :article, :Article +end diff --git a/2230/3/app/views/articles/new.htm b/2230/3/app/views/articles/new.htm new file mode 100644 index 000000000..22d5c1af2 --- /dev/null +++ b/2230/3/app/views/articles/new.htm @@ -0,0 +1,14 @@ + +
+ +Скопируйте URL любой статьи на onliner.by и вставьте в поле ниже
+ + + diff --git a/2230/3/app/views/articles/new.slim b/2230/3/app/views/articles/new.slim new file mode 100644 index 000000000..41d582671 --- /dev/null +++ b/2230/3/app/views/articles/new.slim @@ -0,0 +1,20 @@ +html + head + meta charset='utf-8' + title Добавление новой статьи + body + p Скопируйте URL любой статьи на onliner.by и вставьте в поле ниже + form action='../articles' method='post' + label for='article_new' Новый URL: + input type='text' name='article_new' + br + br + input type='submit' value='Отправить' + + ul + li + a href='javascript:history.back()' Назад + li + a href='../articles' К списку статей + li + a href='/' На главную diff --git a/2230/3/app/views/articles/show.htm b/2230/3/app/views/articles/show.htm new file mode 100644 index 000000000..0f10c9424 --- /dev/null +++ b/2230/3/app/views/articles/show.htm @@ -0,0 +1,35 @@ + + + +Выберете любую статью, чтобы увидеть на основании каких комментариев сформирован рейтинг
+Название | +Ссылка на источник | +Рейтинг | +
---|---|---|
+ Зубр нашёл себе тёлочку + | ++ https://auto.onliner.by/2018/02/03/zubry + | +-53 | +
+ Снова выросли цены на нефть + | ++ https://auto.onliner.by/2018/05/25/belneftexim-6 + | +15 | +
Рейтинг сформирован от -100 до 100 при помощи сервисов Azure
+Название | +Рейтинг | +
---|---|
+ Первонах + |
+ -53 | +
+ А вот я то в советские-то времена... + |
+ 15 | +