From a81fade2ee307934453979b53f6514096d236294 Mon Sep 17 00:00:00 2001 From: kshib Date: Mon, 1 Jan 2024 02:30:25 +0530 Subject: [PATCH 01/16] refactor publishers --- .../general/national/india/thewire.dart | 31 +++++++----------- lib/extractor/general/world/aljazeera.dart | 30 +++-------------- lib/extractor/general/world/bbc.dart | 23 ++++++------- lib/extractor/general/world/reuters.dart | 25 +++------------ lib/extractor/technology/theverge.dart | 32 ++++++------------- lib/extractor/technology/torrentfreak.dart | 26 ++++----------- lib/model/publisher.dart | 2 +- lib/pages/full_article.dart | 2 +- 8 files changed, 52 insertions(+), 119 deletions(-) diff --git a/lib/extractor/general/national/india/thewire.dart b/lib/extractor/general/national/india/thewire.dart index 936ecdc..482ac87 100644 --- a/lib/extractor/general/national/india/thewire.dart +++ b/lib/extractor/general/national/india/thewire.dart @@ -39,27 +39,16 @@ class TheWire extends Publisher { } @override - Future article(String url) async { - var response = await http - .get(Uri.parse('$homePage$url')); + Future article(NewsArticle newsArticle) async { + var response = await http.get(Uri.parse('$homePage${newsArticle.url}')); if (response.statusCode == 200) { var data = json.decode(response.body); var postDetail = data["post-detail"][0]; - var title = postDetail["post_title"]; - var article = postDetail["post_content"]; - var author = postDetail["post_author_name"][0]["author_name"]; - var excerpt = postDetail["post_excerpt"]; + var content = postDetail["post_content"]; var thumbnail = postDetail["featured_image"][0]; - var time = postDetail["post_date_gmt"]; - return NewsArticle( - this, - title ?? "", - article ?? "", - excerpt ?? "", - author ?? "", - url, - thumbnail ?? "", - parseDateString(time?.trim() ?? ""), + return newsArticle.fill( + content: content, + thumbnail: thumbnail, ); } return null; @@ -84,9 +73,10 @@ class TheWire extends Publisher { for (var element in data) { var title = element['post_title']; var author = element['post_author_name'][0]["author_name"]; - var thumbnail = element['hero_image'][0]; //element['thumbnail']['url']; + var thumbnail = element['hero_image'][0]; var time = element["post_date_gmt"]; - var articleUrl = '/wp-json/thewire/v2/posts/detail/${element['post_name']}'; + var articleUrl = + '/wp-json/thewire/v2/posts/detail/${element['post_name']}'; var excerpt = element['post_excerpt']; articles.add(NewsArticle( this, @@ -109,7 +99,8 @@ class TheWire extends Publisher { if (category == '/') { category = 'home'; } - String apiUrl = '$homePage/wp-json/thewire/v2/posts/$category/recent-stories?page=$page&per_page=10'; + String apiUrl = + '$homePage/wp-json/thewire/v2/posts/$category/recent-stories?page=$page&per_page=10'; return extract(apiUrl, false); } diff --git a/lib/extractor/general/world/aljazeera.dart b/lib/extractor/general/world/aljazeera.dart index bc7f5c6..93a61b9 100644 --- a/lib/extractor/general/world/aljazeera.dart +++ b/lib/extractor/general/world/aljazeera.dart @@ -1,4 +1,3 @@ -import 'package:intl/intl.dart'; import 'package:whapp/model/article.dart'; import 'package:whapp/model/publisher.dart'; import 'dart:convert'; @@ -30,38 +29,19 @@ class AlJazeera extends Publisher { } @override - Future article(String url) async { - var response = await http.get(Uri.parse('$homePage$url')); + Future article(NewsArticle newsArticle) async { + var response = await http.get(Uri.parse('$homePage${newsArticle.url}')); if (response.statusCode == 200) { var document = html_parser.parse(utf8.decode(response.bodyBytes)); var article = document.getElementById("main-content-area"); - - var titleElement = article?.querySelector('h1'); - var excerptElement = article?.querySelector('em'); var thumbnailElement = article?.querySelector('img'); var articleElement = article?.querySelector('.wysiwyg'); - var authorElement = article?.querySelector('.author-link'); - var timeElement = article?.querySelector('.date-simple span[aria-hidden]'); - var title = titleElement?.text; var content = articleElement?.text; - var author = authorElement?.text; - var excerpt = excerptElement?.text; var thumbnail = "$homePage${thumbnailElement?.attributes["src"]}"; - var time = timeElement?.text; - - if (time!=null) { - time = DateFormat('d MMM yyyy').parse(time).toString(); - } - return NewsArticle( - this, - title ?? "", - content ?? "", - excerpt ?? "", - author ?? "", - url, - thumbnail, - parseDateString(time?.trim() ?? ""), + return newsArticle.fill( + content: content, + thumbnail: thumbnail, ); } return null; diff --git a/lib/extractor/general/world/bbc.dart b/lib/extractor/general/world/bbc.dart index c5587e4..41a61c4 100644 --- a/lib/extractor/general/world/bbc.dart +++ b/lib/extractor/general/world/bbc.dart @@ -157,30 +157,31 @@ class BBC extends Publisher { } @override - Future article(String url) async { - var response = await http.get(Uri.parse("$homePage$url")); + Future article(NewsArticle newsArticle) async { + var response = await http.get(Uri.parse("$homePage${newsArticle.url}")); if (response.statusCode == 200) { var document = html_parser.parse(utf8.decode(response.bodyBytes)); - var titleElement = document.querySelector('article h1'); - var articleElement = document.querySelectorAll('article p'); - var excerptElement = document.querySelector('article div b'); - var timeElement = document.querySelector('article time'); - var thumbnailElement = document.querySelector('article img'); - var authorElement = document.querySelector("article div[class*=TextContributorName]"); + var article = document.querySelector('.article__main:nth-child(1)'); + var titleElement = article?.querySelector('h1'); + var excerptElement = article?.querySelector('div b'); + var timeElement = article?.querySelector('time'); + var thumbnailElement = article?.querySelector('img'); + var authorElement = article?.querySelector("div[class*=TextContributorName]"); var title = titleElement?.text; - var article = articleElement.sublist(1).map((e) => "

${e.text}

").join(); + var content = article?.innerHtml; var author = authorElement?.text.replaceFirst("By ", ""); var excerpt = excerptElement?.text; var thumbnail = thumbnailElement?.attributes["src"]; var time = timeElement?.attributes["datetime"]; + return NewsArticle( this, title ?? "", - article, + content ?? "", excerpt ?? "", author ?? "", - url, + newsArticle.url, thumbnail ?? "", parseDateString(time?.trim() ?? ""), ); diff --git a/lib/extractor/general/world/reuters.dart b/lib/extractor/general/world/reuters.dart index 4dd3aa8..d8eae05 100644 --- a/lib/extractor/general/world/reuters.dart +++ b/lib/extractor/general/world/reuters.dart @@ -33,29 +33,14 @@ class Reuters extends Publisher { } @override - Future article(String url) async { - var response = await http.get(Uri.parse('https://neuters.de$url')); + Future article(NewsArticle newsArticle) async { + var response = await http.get(Uri.parse('https://neuters.de${newsArticle.url}')); if (response.statusCode == 200) { var document = html_parser.parse(utf8.decode(response.bodyBytes)); - - var titleElement = document.querySelector('h1'); var articleElement = document.querySelectorAll('p:not(.byline)'); - var timeAuthorElement = document.querySelector('.byline'); - var title = titleElement?.text; - var article = articleElement.map((e) => e.text).join("\n"); - var author = timeAuthorElement?.text.split(" - ")[1]; - var excerpt = ""; - var thumbnail = ""; - var time = timeAuthorElement?.text.split(" - ")[0]; - return NewsArticle( - this, - title ?? "", - article, - excerpt, - author ?? "", - url, - thumbnail, - parseDateString(time?.trim() ?? ""), + var content = articleElement.map((e) => e.text).join("\n"); + return newsArticle.fill( + content: content, ); } return null; diff --git a/lib/extractor/technology/theverge.dart b/lib/extractor/technology/theverge.dart index 5d3cad4..68703d7 100644 --- a/lib/extractor/technology/theverge.dart +++ b/lib/extractor/technology/theverge.dart @@ -36,32 +36,24 @@ class TheVerge extends Publisher { } @override - Future article(String url) async { - var response = await http.get(Uri.parse('$homePage$url')); + Future article(NewsArticle newsArticle) async { + var response = await http.get(Uri.parse('$homePage${newsArticle.url}')); if (response.statusCode == 200) { var document = html_parser.parse(utf8.decode(response.bodyBytes)); - var titleElement = document.querySelector('h1') ?? document.querySelector("div.inline"); var articleElement = document.querySelector('.duet--article--article-body-component-container') ?? document.querySelector(".flex-1"); var authorElement = document.querySelector('span.font-medium > a') ?? document.querySelector("a[href*=authors]"); - var excerptElement = document.querySelector('span h2.inline'); var thumbnailElement = document.querySelector('.duet--article--lede-image img'); var timeElement = document.querySelector('time'); - var title = titleElement?.text; - var article = articleElement?.innerHtml; + var content = articleElement?.innerHtml; var author = authorElement?.text; - var excerpt = excerptElement?.text; var thumbnail = thumbnailElement!=null?thumbnailElement.attributes["src"]:""; var time = timeElement?.attributes["datetime"]; - return NewsArticle( - this, - title ?? "", - article ?? "", - excerpt ?? "", - author ?? "", - url, - thumbnail ?? "", - parseDateString(time?.trim() ?? ""), + return newsArticle.fill( + content: content, + author: author, + thumbnail: thumbnail, + publishedAt: parseDateString(time?.trim() ?? ""), ); } return null; @@ -111,11 +103,7 @@ class TheVerge extends Publisher { Future> extractSearchArticles(String searchQuery, int page) async { Set articles = {}; var response = await http.get( - Uri.parse("$homePage/api/search"), - headers: { - "q": searchQuery, - "page": (page-1).toString(), - } + Uri.parse('$homePage/api/search?q=$searchQuery&page=$page') ); if (response.statusCode == 200) { @@ -126,7 +114,7 @@ class TheVerge extends Publisher { var title = element["title"]; var url = element["link"]; var excerpt = element["htmlSnippet"]; - var thumbnail = element["pagemap"]["cse_image"][0]; + var thumbnail = element["pagemap"]["cse_image"][0]["src"]; var time = element["snippet"].split("...")[0]; articles.add(NewsArticle( this, diff --git a/lib/extractor/technology/torrentfreak.dart b/lib/extractor/technology/torrentfreak.dart index 2199767..038f32d 100644 --- a/lib/extractor/technology/torrentfreak.dart +++ b/lib/extractor/technology/torrentfreak.dart @@ -37,33 +37,21 @@ class TorrentFreak extends Publisher { } @override - Future article(String url) async { - var response = await http.get(Uri.parse('$homePage$url')); + Future article(NewsArticle newsArticle) async { + var response = await http.get(Uri.parse('$homePage${newsArticle.url}')); if (response.statusCode == 200) { var document = html_parser.parse(utf8.decode(response.bodyBytes)); - var titleElement = document.querySelector('.hero__title'); var articleElement = document.querySelector('.article__body'); - var authorElement = document.querySelector('.hero__published a'); var excerptElement = document.querySelector('.article__excerpt'); var thumbnailElement = document.querySelector('section[data-bg]'); - var timeElement = document.querySelector('time'); - var title = titleElement?.text; - var article = articleElement?.innerHtml; - var author = authorElement?.text; + var content = articleElement?.innerHtml; var excerpt = excerptElement?.text; var thumbnail = thumbnailElement?.attributes["data-bg"]; - var time = timeElement?.text; - - return NewsArticle( - this, - title ?? "", - article ?? "", - excerpt ?? "", - author ?? "", - url, - thumbnail ?? "", - parseDateString(time?.trim() ?? ""), + return newsArticle.fill( + content: content, + excerpt: excerpt, + thumbnail: thumbnail, ); } return null; diff --git a/lib/model/publisher.dart b/lib/model/publisher.dart index d58f72c..2d9fb29 100644 --- a/lib/model/publisher.dart +++ b/lib/model/publisher.dart @@ -40,7 +40,7 @@ abstract class Publisher { Future> searchedArticles({required String searchQuery, int page = 1}); - Future article(String url); + Future article(NewsArticle newsArticle); Map toJson() { return { diff --git a/lib/pages/full_article.dart b/lib/pages/full_article.dart index cbe8be6..00c1f17 100644 --- a/lib/pages/full_article.dart +++ b/lib/pages/full_article.dart @@ -37,7 +37,7 @@ class _ArticlePageState extends State { return FutureBuilder( initialData: widget.article, - future: widget.article.publisher.article(widget.article.url), + future: widget.article.publisher.article(widget.article), builder: (context, snapshot) { String fullUrl = "${widget.article.publisher.homePage}${snapshot.data!.url}"; From 5457d8c725cd8560689b8c4094053718dc243718 Mon Sep 17 00:00:00 2001 From: kshib Date: Mon, 1 Jan 2024 02:31:26 +0530 Subject: [PATCH 02/16] add source: bleepingcomputer --- .../technology/bleepingcomputer.dart | 77 +++++++++++++++++++ .../technology/bleepingcomputer.dart | 21 +++++ 2 files changed, 98 insertions(+) create mode 100644 lib/extractor/technology/bleepingcomputer.dart create mode 100644 test/extractor/technology/bleepingcomputer.dart diff --git a/lib/extractor/technology/bleepingcomputer.dart b/lib/extractor/technology/bleepingcomputer.dart new file mode 100644 index 0000000..501cb94 --- /dev/null +++ b/lib/extractor/technology/bleepingcomputer.dart @@ -0,0 +1,77 @@ +import 'package:html/dom.dart'; +import 'package:whapp/model/article.dart'; +import 'package:whapp/model/publisher.dart'; +import 'dart:convert'; +import 'package:http/http.dart' as http; +import 'package:html/parser.dart' as html_parser; +import 'package:whapp/utils/time.dart'; + +class BleepingComputer extends Publisher { + @override + String get homePage => "https://www.bleepingcomputer.com"; + + @override + String get name => "BleepingComputer"; + + @override + Future article(NewsArticle newsArticle) async { + var response = await http.get(Uri.parse("$homePage${newsArticle.url}")); + if (response.statusCode == 200) { + Document document = html_parser.parse(utf8.decode(response.bodyBytes)); + Element? articleElement = document.querySelector(".article_section"); + String? thumbnail = articleElement + ?.querySelector("p img") + ?.attributes["src"]; + String? content = articleElement?.innerHtml; + return newsArticle.fill(content: content, thumbnail: thumbnail,); + } + return null; + } + + @override + bool get hasSearchSupport => false; + + @override + Future> get categories async => {}; + + @override + Future> categoryArticles({String category = "", int page = 1}) async { + Set articles = {}; + var response = await http.get(Uri.parse("$homePage/news/page/$page")); + if (response.statusCode == 200) { + Document document = html_parser.parse(utf8.decode(response.bodyBytes)); + List articleElements = + document.querySelectorAll("#bc-home-news-main-wrap > li"); + for (Element articleElement in articleElements) { + String? title = articleElement.querySelector("h4")?.text; + String? excerpt = articleElement.querySelector("p")?.text; + String? author = articleElement.querySelector(".author")?.text; + String? url = articleElement.querySelector("h4 a")?.attributes["href"]; + String? thumbnail = articleElement + .querySelector(".bc_latest_news_img img") + ?.attributes["src"]; + String? content = ""; + String? date = articleElement.querySelector(".bc_news_date")?.text; + String? time = articleElement.querySelector(".bc_news_time")?.text; + String parsedTime = convertToIso8601("$date $time", "MMMM dd, yyyy hh:mm a"); + + NewsArticle( + this, + title ?? "", + content, + excerpt ?? "", + author ?? "", + url ?? "", + thumbnail ?? "", + parseDateString(parsedTime), + ); + } + } + return articles; + } + + @override + Future> searchedArticles({required String searchQuery, int page = 1}) async{ + return {}; + } +} diff --git a/test/extractor/technology/bleepingcomputer.dart b/test/extractor/technology/bleepingcomputer.dart new file mode 100644 index 0000000..d990038 --- /dev/null +++ b/test/extractor/technology/bleepingcomputer.dart @@ -0,0 +1,21 @@ +import 'package:flutter_test/flutter_test.dart'; +import 'package:whapp/extractor/technology/bleepingcomputer.dart'; +import 'package:whapp/model/publisher.dart'; + +import '../common.dart'; + +void main() { + Publisher publisher = BleepingComputer(); + + test('Extract Categories Test', () async { + await ExtractorTest.categoriesTest(publisher); + }); + + test('Category Articles Test', () async { + await ExtractorTest.categoryArticlesTest(publisher); + }); + + test('Search Articles Test', () async { + await ExtractorTest.searchedArticlesTest(publisher, 'tech'); + }); +} From a6eb93a2c668d911b09b42d9c4ae7c10b9b522ad Mon Sep 17 00:00:00 2001 From: kshib Date: Mon, 1 Jan 2024 02:31:44 +0530 Subject: [PATCH 03/16] add source: engadget --- lib/extractor/technology/engadget.dart | 111 ++++++++++++++++++++++++ test/extractor/technology/engadget.dart | 21 +++++ 2 files changed, 132 insertions(+) create mode 100644 lib/extractor/technology/engadget.dart create mode 100644 test/extractor/technology/engadget.dart diff --git a/lib/extractor/technology/engadget.dart b/lib/extractor/technology/engadget.dart new file mode 100644 index 0000000..d836381 --- /dev/null +++ b/lib/extractor/technology/engadget.dart @@ -0,0 +1,111 @@ +import 'package:html/dom.dart'; +import 'package:whapp/model/article.dart'; +import 'package:whapp/model/publisher.dart'; +import 'dart:convert'; +import 'package:http/http.dart' as http; +import 'package:html/parser.dart' as html_parser; +import 'package:whapp/utils/time.dart'; + +class Engadget extends Publisher { + @override + String get homePage => "https://www.engadget.com"; + + @override + String get name => "Engadget"; + + @override + Future article(NewsArticle newsArticle) async { + var response = await http.get(Uri.parse("$homePage${newsArticle.url}")); + if (response.statusCode == 200) { + Document document = html_parser.parse(utf8.decode(response.bodyBytes)); + Element? articleElement = document.querySelector(".article_section"); + String? thumbnail = articleElement + ?.querySelector("p img") + ?.attributes["src"]; + String? content = articleElement?.innerHtml; + return newsArticle.fill(content: content, thumbnail: thumbnail); + } + return null; + } + + @override + Future> get categories async => { + "News": "news", + "Reviews": "reviews", + "Guides": "guides", + "Gaming": "gaming", + "Gear": "gear", + "Entertainment": "entertainment", + "Tomorrow": "tomorrow", + "Deals": "deals", + }; + + @override + Future> categoryArticles({String category = "news", int page = 1}) async { + Set articles = {}; + var response = await http.get(Uri.parse("$homePage/$category/page/$page")); + if (response.statusCode == 200) { + Document document = html_parser.parse(utf8.decode(response.bodyBytes)); + List articleElements = + document.querySelectorAll("ul[data-component=LatestStream] li[class]"); + for (Element articleElement in articleElements) { + String? title = articleElement.querySelector("h2 a")?.text; + String? excerpt = articleElement.querySelector("h2+div")?.text; + String? authorTime = articleElement.querySelector("div[class*=engadgetFontDarkGray]")?.text; + String? author = authorTime?.split(",")[0].replaceFirst("By ", ""); + String? date = authorTime?.split(",")[1]; + String? url = articleElement.querySelector("h2 a")?.attributes["href"]; + String? thumbnail = articleElement + .querySelector("img[width]") + ?.attributes["src"]; + String parsedTime = convertToIso8601("$date", "MM.dd.yyyy"); + + NewsArticle( + this, + title ?? "", + "", + excerpt ?? "", + author ?? "", + url ?? "", + thumbnail ?? "", + parseDateString(parsedTime), + ); + } + } + return articles; + } + + @override + Future> searchedArticles({required String searchQuery, int page = 1}) async{ + Set articles = {}; + var response = await http.get(Uri.parse("https://search.engadget.com/search;?p=$searchQuery&pz=10&fr=engadget&fr2=sb-top&bct=0&b=${(page*10)+1}&pz=10&bct=0&xargs=0")); + if (response.statusCode == 200) { + Document document = html_parser.parse(utf8.decode(response.bodyBytes)); + List articleElements = + document.querySelectorAll(".compArticleList li"); + for (Element articleElement in articleElements) { + String? title = articleElement.querySelector("h4 a")?.text; + String? excerpt = articleElement.querySelector("h4+p")?.text; + String? author = articleElement.querySelector(".csub span[class*=pr]")?.text; + String? date = articleElement.querySelector(".csub span[class*=pl]")?.text; + String? url = articleElement.querySelector("h4 a")?.attributes["href"]; + String? thumbnail = articleElement + .querySelector(".thmb") + ?.attributes["src"]; + String parsedTime = convertToIso8601("$date", "MM.dd.yyyy"); + + NewsArticle( + this, + title ?? "", + "", + excerpt ?? "", + author ?? "", + url ?? "", + thumbnail ?? "", + parseDateString(parsedTime), + ); + } + } + return articles; + } +} diff --git a/test/extractor/technology/engadget.dart b/test/extractor/technology/engadget.dart new file mode 100644 index 0000000..1d22bcc --- /dev/null +++ b/test/extractor/technology/engadget.dart @@ -0,0 +1,21 @@ +import 'package:flutter_test/flutter_test.dart'; +import 'package:whapp/extractor/technology/engadget.dart'; +import 'package:whapp/model/publisher.dart'; + +import '../common.dart'; + +void main() { + Publisher publisher = Engadget(); + + test('Extract Categories Test', () async { + await ExtractorTest.categoriesTest(publisher); + }); + + test('Category Articles Test', () async { + await ExtractorTest.categoryArticlesTest(publisher); + }); + + test('Search Articles Test', () async { + await ExtractorTest.searchedArticlesTest(publisher, 'tech'); + }); +} From 889f3c13985e34b43dd749932c8807d47f9d497e Mon Sep 17 00:00:00 2001 From: kshib Date: Mon, 1 Jan 2024 02:31:50 +0530 Subject: [PATCH 04/16] add source: nitter --- lib/extractor/general/world/nitter.dart | 111 +++++++++++++++++++++++ test/extractor/general/world/nitter.dart | 30 ++++++ 2 files changed, 141 insertions(+) create mode 100644 lib/extractor/general/world/nitter.dart create mode 100644 test/extractor/general/world/nitter.dart diff --git a/lib/extractor/general/world/nitter.dart b/lib/extractor/general/world/nitter.dart new file mode 100644 index 0000000..dc654c6 --- /dev/null +++ b/lib/extractor/general/world/nitter.dart @@ -0,0 +1,111 @@ +import 'package:html/dom.dart'; +import 'package:intl/intl.dart'; +import 'package:whapp/model/article.dart'; +import 'package:whapp/model/publisher.dart'; +import 'dart:convert'; +import 'package:http/http.dart' as http; +import 'package:html/parser.dart' as html_parser; +import 'package:whapp/utils/time.dart'; + +class Nitter extends Publisher { + Map nextCursor = {}; + + @override + String get homePage => "https://nitter.net"; + + @override + String get name => "Nitter"; + + @override + Future> get categories async => {}; + + Future> extract(String category, int page, + {String query = ""}) async { + Set articles = {}; + var dates = generateWeekDates(page); + var response = await http.get( + Uri.parse( + "$homePage/$category/search?f=tweets&q=$query&since=${dates[0]}&until=${dates[1]}"), + headers: { + 'Host': 'nitter.net', + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:121.0) Gecko/20100101 Firefox/121.0', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', + 'Accept-Language': 'en-US,en;q=0.5', + }, + ); + if (response.statusCode == 200) { + Document document = html_parser.parse(utf8.decode(response.bodyBytes)); + List articleElements = document.querySelectorAll(".tweet-body"); + for (Element articleElement in articleElements) { + String? title = articleElement + .querySelector(".tweet-content") + ?.text + .split("\n") + .first; + String? excerpt = articleElement.querySelector(".tweet-content")?.text; + String? author = articleElement.querySelector(".username")?.text; + String? url = + articleElement.querySelector(".tweet-link")?.attributes["href"]; + String? thumbnail = ""; + String? content = ""; + String? date = + articleElement.querySelector(".tweet-date a")?.attributes["title"]; + String parsedTime = + convertToIso8601("$date", "MMM d, yyyy ยท h:mm a UTC"); + + articles.add(NewsArticle( + this, + title ?? "", + content, + excerpt ?? "", + author ?? "", + "$homePage$url", + thumbnail, + parseDateString(parsedTime), + )); + } + return articles; + } + return articles; + } + + @override + Future> categoryArticles( + {String category = "", int page = 1}) async { + if (category.isEmpty || category == "/") return {}; + return extract(category, page); + } + + @override + Future> searchedArticles( + {required String searchQuery, int page = 1}) async { + if (!searchQuery.contains("#")) return {}; + return extract(searchQuery.split("#")[0], page, + query: searchQuery.split("#")[1]); + } + + @override + Future article(NewsArticle newsArticle) async { + var response = await http.get(Uri.parse(newsArticle.url)); + if (response.statusCode == 200) { + Document document = html_parser.parse(utf8.decode(response.bodyBytes)); + return newsArticle.fill( + content: document.querySelector(".tweet-body")?.text ?? ""); + } + return null; + } + + List generateWeekDates(int page) { + DateTime currentDate = DateTime.now(); + DateTime untilDate; + if (page == 1) { + untilDate = currentDate; + } else { + untilDate = currentDate.subtract(Duration(days: 7 * (page - 1))); + } + DateTime sinceDate = untilDate.subtract(Duration(days: 7)); + String sinceDateString = DateFormat('yyyy-MM-dd').format(sinceDate); + String untilDateString = DateFormat('yyyy-MM-dd').format(untilDate); + return [sinceDateString, untilDateString]; + } +} diff --git a/test/extractor/general/world/nitter.dart b/test/extractor/general/world/nitter.dart new file mode 100644 index 0000000..d553ca3 --- /dev/null +++ b/test/extractor/general/world/nitter.dart @@ -0,0 +1,30 @@ +import 'package:flutter_test/flutter_test.dart'; +import 'package:whapp/extractor/general/world/nitter.dart'; +import 'package:whapp/extractor/technology/engadget.dart'; +import 'package:whapp/model/article.dart'; +import 'package:whapp/model/publisher.dart'; + +import '../../common.dart'; + +void main() { + Publisher publisher = Nitter(); + + test('Extract Categories Test', () async { + await ExtractorTest.categoriesTest(publisher); + }); + + test('Category Articles Test', () async { + final categoryArticles = await publisher.categoryArticles(category: "Steam", page: 1); + + expect(categoryArticles, isNotEmpty); + + var article = categoryArticles.first; + expect(article, isA()); + expect(article?.title, isNotEmpty); + expect(article?.publishedAt.value, isNot(0)); + }); + + test('Search Articles Test', () async { + await ExtractorTest.searchedArticlesTest(publisher, 'Steam#sale'); + }); +} From 4ac0c40d4f847ff4ba20f6683b83268d0485b9c1 Mon Sep 17 00:00:00 2001 From: kshib Date: Mon, 1 Jan 2024 02:32:04 +0530 Subject: [PATCH 05/16] add source: arstechnica --- lib/extractor/technology/arstechnica.dart | 100 +++++++++++++++++++++ test/extractor/technology/arstechnica.dart | 21 +++++ 2 files changed, 121 insertions(+) create mode 100644 lib/extractor/technology/arstechnica.dart create mode 100644 test/extractor/technology/arstechnica.dart diff --git a/lib/extractor/technology/arstechnica.dart b/lib/extractor/technology/arstechnica.dart new file mode 100644 index 0000000..b842b93 --- /dev/null +++ b/lib/extractor/technology/arstechnica.dart @@ -0,0 +1,100 @@ +import 'package:html/dom.dart'; +import 'package:whapp/model/article.dart'; +import 'package:whapp/model/publisher.dart'; +import 'dart:convert'; +import 'package:http/http.dart' as http; +import 'package:html/parser.dart' as html_parser; +import 'package:whapp/utils/time.dart'; + +class ArsTechnica extends Publisher { + @override + String get homePage => "https://arstechnica.com"; + + @override + String get name => "Ars Technica"; + + @override + Future article(NewsArticle newsArticle) async { + var response = await http.get(Uri.parse("$homePage${newsArticle.url}")); + if (response.statusCode == 200) { + Document document = html_parser.parse(utf8.decode(response.bodyBytes)); + Element? articleElement = document.querySelector("article"); + String? thumbnail = articleElement + ?.querySelector("p img") + ?.attributes["src"]; + String? content = articleElement?.innerHtml; + return newsArticle.fill(content: content, thumbnail: thumbnail); + } + return null; + } + + @override + Future> get categories async => { + "News": "news", + "Reviews": "reviews", + "Guides": "guides", + "Gaming": "gaming", + "Gear": "gear", + "Entertainment": "entertainment", + "Tomorrow": "tomorrow", + "Deals": "deals", + }; + + @override + bool get hasSearchSupport => false; + + @override + Future> categoryArticles({String category = "", int page = 1}) async { + Set articles = {}; + if(category.isNotEmpty) { + category="/$category"; + } + + var response = await http.get(Uri.parse("$homePage$category/page/$page")); + if (response.statusCode == 200) { + Document document = html_parser.parse(utf8.decode(response.bodyBytes)); + List articleElements = + document.querySelectorAll(".article"); + for (Element articleElement in articleElements) { + String? title = articleElement.querySelector("h2 a")?.text; + String? excerpt = articleElement.querySelector(".excerpt")?.text; + String? author = articleElement.querySelector("span[itemprop=name]")?.text; + String? date = articleElement.querySelector("time")?.attributes["datetime"] ?? ""; + String? url = articleElement.querySelector("h2 a")?.attributes["href"]; + String? thumbnail = articleElement + .querySelector("figure div") + ?.attributes["style"]; + + NewsArticle( + this, + title ?? "", + "", + excerpt ?? "", + author ?? "", + url ?? "", + extractUrl(thumbnail), + parseDateString(date), + ); + } + } + return articles; + } + + String extractUrl(String? inputString) { + RegExp regExp = RegExp(r"url\('([^']*)'\)"); + if(inputString!=null) { + Match? match = regExp.firstMatch(inputString); + if (match != null) { + return match.group(1)!; + } else { + return ""; + } + } + return ""; + } + + @override + Future> searchedArticles({required String searchQuery, int page = 1}) async{ + return {}; + } +} diff --git a/test/extractor/technology/arstechnica.dart b/test/extractor/technology/arstechnica.dart new file mode 100644 index 0000000..7bff50c --- /dev/null +++ b/test/extractor/technology/arstechnica.dart @@ -0,0 +1,21 @@ +import 'package:flutter_test/flutter_test.dart'; +import 'package:whapp/extractor/technology/arstechnica.dart'; +import 'package:whapp/model/publisher.dart'; + +import '../common.dart'; + +void main() { + Publisher publisher = ArsTechnica(); + + test('Extract Categories Test', () async { + await ExtractorTest.categoriesTest(publisher); + }); + + test('Category Articles Test', () async { + await ExtractorTest.categoryArticlesTest(publisher); + }); + + test('Search Articles Test', () async { + await ExtractorTest.searchedArticlesTest(publisher, 'tech'); + }); +} From 7fae7d657fabb7d26485ec37ade85fb7a6c7f96e Mon Sep 17 00:00:00 2001 From: kshib Date: Mon, 1 Jan 2024 02:33:09 +0530 Subject: [PATCH 06/16] refactor test --- .../general/national/india/thewire.dart | 48 +++++------------ test/extractor/general/world/aljazeera.dart | 44 ++++------------ test/extractor/general/world/bbc.dart | 52 ++++--------------- test/extractor/general/world/reuters.dart | 45 ++++------------ test/extractor/technology/theverge.dart | 45 ++++------------ test/extractor/technology/torrentfreak.dart | 49 ++++------------- 6 files changed, 63 insertions(+), 220 deletions(-) diff --git a/test/extractor/general/national/india/thewire.dart b/test/extractor/general/national/india/thewire.dart index 87d26e3..8d431d2 100644 --- a/test/extractor/general/national/india/thewire.dart +++ b/test/extractor/general/national/india/thewire.dart @@ -1,46 +1,22 @@ import 'package:flutter_test/flutter_test.dart'; import 'package:whapp/extractor/general/national/india/thewire.dart'; -import 'package:whapp/model/article.dart'; +import 'package:whapp/model/publisher.dart'; -void main() { - test('The Wire - Extract Categories Test', () async { - final theWire = TheWire(); - - final categories = await theWire.categories; - - expect(categories, isA>()); - expect(categories.isNotEmpty, true); - }); - - test('The Wire - Article Test', () async { - final theWire = TheWire(); - - const articleUrl = '/dont-marry-a-brit-unless-theyre-really-rich'; - final article = await theWire.article(articleUrl); +import '../../../common.dart'; - expect(article, isA()); - expect(article?.title, isNotEmpty); - expect(article?.content, isNotEmpty); - expect(article?.publishedAt.value, isNot(0)); +void main() { + Publisher publisher = TheWire(); + + test('Extract Categories Test', () async { + await ExtractorTest.categoriesTest(publisher); }); - test('The Wire - Category Articles Test', () async { - final theWire = TheWire(); - - final categoryArticles = - await theWire.categoryArticles(category: 'category/politics', page: 1); - - expect(categoryArticles, isA>()); - expect(categoryArticles, isNotEmpty); + test('Category Articles Test', () async { + await ExtractorTest.categoryArticlesTest(publisher); }); - test('The Wire - Search Articles Test', () async { - final theWire = TheWire(); - - final searchArticles = - await theWire.searchedArticles(searchQuery: 'delhi', page: 1); - - expect(searchArticles, isA>()); - expect(searchArticles, isNotEmpty); + test('Search Articles Test', () async { + await ExtractorTest.searchedArticlesTest(publisher, 'world'); }); } + diff --git a/test/extractor/general/world/aljazeera.dart b/test/extractor/general/world/aljazeera.dart index f2353e6..dd62be4 100644 --- a/test/extractor/general/world/aljazeera.dart +++ b/test/extractor/general/world/aljazeera.dart @@ -1,45 +1,21 @@ import 'package:flutter_test/flutter_test.dart'; import 'package:whapp/extractor/general/world/aljazeera.dart'; -import 'package:whapp/model/article.dart'; +import 'package:whapp/model/publisher.dart'; -void main() { - - late AlJazeera alJazeera; - setUp(() { - alJazeera = AlJazeera(); - }); +import '../../common.dart'; - test('Al Jazeera - Categories Test', () async { - final categories = await alJazeera.categories; - - expect(categories, isA>()); - expect(categories.isNotEmpty, true); - }); - - test('Al Jazeera - Article Test', () async { - final articleUrl = - '/news/2023/12/25/ukraine-russia-say-six-civilians-killed-in-attacks-on-kherson-horlivka'; - final article = await alJazeera.article(articleUrl); +void main() { + Publisher publisher = AlJazeera(); - expect(article, isA()); - expect(article?.title, isNotEmpty); - expect(article?.content, isNotEmpty); - expect(article?.publishedAt.value, isNot(0)); + test('Extract Categories Test', () async { + await ExtractorTest.categoriesTest(publisher); }); - test('Al Jazeera - Category Articles Test', () async { - final categoryArticles = - await alJazeera.categoryArticles(category: 'features', page: 1); - - expect(categoryArticles, isA>()); - expect(categoryArticles, isNotEmpty); + test('Category Articles Test', () async { + await ExtractorTest.categoryArticlesTest(publisher); }); - test('Al Jazeera - Searched Articles Test', () async { - final searchedArticles = - await alJazeera.searchedArticles(searchQuery: 'ukraine', page: 1); - - expect(searchedArticles, isA>()); - expect(searchedArticles, isNotEmpty); + test('Search Articles Test', () async { + await ExtractorTest.searchedArticlesTest(publisher, 'world'); }); } diff --git a/test/extractor/general/world/bbc.dart b/test/extractor/general/world/bbc.dart index cf8e86f..61ebf75 100644 --- a/test/extractor/general/world/bbc.dart +++ b/test/extractor/general/world/bbc.dart @@ -1,51 +1,21 @@ import 'package:flutter_test/flutter_test.dart'; import 'package:whapp/extractor/general/world/bbc.dart'; -import 'package:whapp/model/article.dart'; +import 'package:whapp/model/publisher.dart'; -void main() { - test('BBC - Extract Categories Test', () async { - final bbc = BBC(); +import '../../common.dart'; - final categories = await bbc.categories; +void main() { + Publisher publisher = BBC(); - expect(categories, isA>()); - expect(categories.isNotEmpty, true); + test('Extract Categories Test', () async { + await ExtractorTest.categoriesTest(publisher); }); - test('BBC - Article Test', () async { - final bbc = BBC(); - - final articleUrl = '/news/world-asia-67825665'; - final article = await bbc.article(articleUrl); - - expect(article, isA()); - expect(article?.title, isNotEmpty); - expect(article?.content, isNotEmpty); - expect(article?.publishedAt.value, isNot(0)); + test('Category Articles Test', () async { + await ExtractorTest.categoryArticlesTest(publisher); }); - test('BBC - Category Articles Test', () async { - final bbc = BBC(); - - var categoryArticles = - await bbc.categoryArticles(category: 'world', page: 1); - - expect(categoryArticles, isA>()); - expect(categoryArticles, isNotEmpty); - - categoryArticles = - await bbc.categoryArticles(category: 'technology', page: 1); - expect(categoryArticles, isA>()); - expect(categoryArticles, isNotEmpty); - }); - - test('BBC - Searched Articles Test', () async { - final bbc = BBC(); - - final searchedArticles = - await bbc.searchedArticles(searchQuery: 'climate', page: 1); - - expect(searchedArticles, isA>()); - expect(searchedArticles, isNotEmpty); + test('Search Articles Test', () async { + await ExtractorTest.searchedArticlesTest(publisher, 'world'); }); -} \ No newline at end of file +} diff --git a/test/extractor/general/world/reuters.dart b/test/extractor/general/world/reuters.dart index a51a516..85471a7 100644 --- a/test/extractor/general/world/reuters.dart +++ b/test/extractor/general/world/reuters.dart @@ -1,46 +1,21 @@ import 'package:flutter_test/flutter_test.dart'; import 'package:whapp/extractor/general/world/reuters.dart'; -import 'package:whapp/model/article.dart'; +import 'package:whapp/model/publisher.dart'; -void main() { - test('Reuters - Extract Categories Test', () async { - final reuters = Reuters(); - - final categories = await reuters.categories; - - expect(categories, isA>()); - expect(categories.isNotEmpty, true); - }); - - test('Reuters - Article Test', () async { - final reuters = Reuters(); +import '../../common.dart'; - final articleUrl = '/world/us/senators-move-require-release-us-government-ufo-records-2023-07-14/'; - final article = await reuters.article(articleUrl); +void main() { + Publisher publisher = Reuters(); - expect(article, isA()); - expect(article?.title, isNotEmpty); - expect(article?.content, isNotEmpty); - expect(article?.publishedAt.value, isNot(0)); + test('Extract Categories Test', () async { + await ExtractorTest.categoriesTest(publisher); }); - test('Reuters - Category Articles Test', () async { - final reuters = Reuters(); - - final categoryArticles = - await reuters.categoryArticles(category: 'business', page: 1); - - expect(categoryArticles, isA>()); - expect(categoryArticles, isNotEmpty); + test('Category Articles Test', () async { + await ExtractorTest.categoryArticlesTest(publisher); }); - test('Reuters - Searched Articles Test', () async { - final reuters = Reuters(); - - final searchedArticles = - await reuters.searchedArticles(searchQuery: 'ufo', page: 1); - - expect(searchedArticles, isA>()); - expect(searchedArticles, isNotEmpty); + test('Search Articles Test', () async { + await ExtractorTest.searchedArticlesTest(publisher, 'world'); }); } diff --git a/test/extractor/technology/theverge.dart b/test/extractor/technology/theverge.dart index 63fa8bb..ecccd7d 100644 --- a/test/extractor/technology/theverge.dart +++ b/test/extractor/technology/theverge.dart @@ -1,46 +1,21 @@ import 'package:flutter_test/flutter_test.dart'; import 'package:whapp/extractor/technology/theverge.dart'; -import 'package:whapp/model/article.dart'; +import 'package:whapp/model/publisher.dart'; -void main() { - test('The Verge - Extract Categories Test', () async { - final theVerge = TheVerge(); - - final categories = await theVerge.categories; - - expect(categories, isA>()); - expect(categories.isNotEmpty, true); - }); - - test('The Verge - Article Test', () async { - final theVerge = TheVerge(); +import '../common.dart'; - const articleUrl = '/2023/12/21/24011168/sony-playstation-discovery-shows-not-removed'; - final article = await theVerge.article(articleUrl); +void main() { + Publisher publisher = TheVerge(); - expect(article, isA()); - expect(article?.title, isNotEmpty); - expect(article?.content, isNotEmpty); - expect(article?.publishedAt.value, isNot(0)); + test('Extract Categories Test', () async { + await ExtractorTest.categoriesTest(publisher); }); - test('The Verge - Category Articles Test', () async { - final theVerge = TheVerge(); - - final categoryArticles = - await theVerge.categoryArticles(category: 'tech', page: 1); - - expect(categoryArticles, isA>()); - expect(categoryArticles, isNotEmpty); + test('Category Articles Test', () async { + await ExtractorTest.categoryArticlesTest(publisher); }); - test('The Verge - Search Articles Test', () async { - final theVerge = TheVerge(); - - final searchArticles = - await theVerge.searchedArticles(searchQuery: 'playstation', page: 1); - - expect(searchArticles, isA>()); - expect(searchArticles, isNotEmpty); + test('Search Articles Test', () async { + await ExtractorTest.searchedArticlesTest(publisher, 'tech'); }); } diff --git a/test/extractor/technology/torrentfreak.dart b/test/extractor/technology/torrentfreak.dart index 0f7048f..d743c3c 100644 --- a/test/extractor/technology/torrentfreak.dart +++ b/test/extractor/technology/torrentfreak.dart @@ -1,50 +1,21 @@ import 'package:flutter_test/flutter_test.dart'; import 'package:whapp/extractor/technology/torrentfreak.dart'; -import 'package:whapp/model/article.dart'; +import 'package:whapp/model/publisher.dart'; -void main() { - test('TorrentFreak - Extract Categories Test', () async { - final torrentFreak = TorrentFreak(); +import '../common.dart'; - final categories = await torrentFreak.categories; +void main() { + Publisher publisher = TorrentFreak(); - expect(categories, isA>()); - expect(categories.isNotEmpty, true); + test('Extract Categories Test', () async { + await ExtractorTest.categoriesTest(publisher); }); - test('TorrentFreak - Article Test', () async { - final torrentFreak = TorrentFreak(); - - const articleUrl = - '/spammers-use-epic-games-website-to-promote-piracy-scams-231210'; - final article = await torrentFreak.article(articleUrl); - - expect(article, isA()); - expect(article?.title, isNotEmpty); - expect(article?.content, isNotEmpty); - expect(article?.publishedAt.value, isNot(0)); + test('Category Articles Test', () async { + await ExtractorTest.categoryArticlesTest(publisher); }); - test('TorrentFreak - Category Articles Test', () async { - final torrentFreak = TorrentFreak(); - - final categoryArticles = - await torrentFreak.categoryArticles(category: '/', page: 1); - - expect(categoryArticles, isA>()); - expect(categoryArticles, isNotEmpty); - }); - - test('TorrentFreak - Searched Articles Test', () async { - final torrentFreak = TorrentFreak(); - - const searchQuery = 'piracy'; - const page = 1; - - final searchedArticles = await torrentFreak.searchedArticles( - searchQuery: searchQuery, page: page); - - expect(searchedArticles, isA>()); - expect(searchedArticles, isNotEmpty); + test('Search Articles Test', () async { + await ExtractorTest.searchedArticlesTest(publisher, 'tech'); }); } From 4312791c8d309026f889bfc88fae26d97558418b Mon Sep 17 00:00:00 2001 From: kshib Date: Mon, 1 Jan 2024 02:33:34 +0530 Subject: [PATCH 07/16] add method: convertToIso8601 --- lib/utils/time.dart | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/lib/utils/time.dart b/lib/utils/time.dart index f6d6da5..17047ee 100644 --- a/lib/utils/time.dart +++ b/lib/utils/time.dart @@ -1,3 +1,5 @@ +import 'package:intl/intl.dart'; + MapEntry parseDateString(String timestamp) { try { DateTime dateTime = DateTime.parse(timestamp); @@ -14,10 +16,12 @@ MapEntry parseDateString(String timestamp) { return MapEntry(differenceInSeconds, '$differenceInSeconds seconds ago'); } else if (differenceInSeconds < hour) { int minutes = (differenceInSeconds / minute).floor(); - return MapEntry(differenceInSeconds, '$minutes ${(minutes == 1) ? 'minute' : 'minutes'} ago'); + return MapEntry(differenceInSeconds, + '$minutes ${(minutes == 1) ? 'minute' : 'minutes'} ago'); } else if (differenceInSeconds < day) { int hours = (differenceInSeconds / hour).floor(); - return MapEntry(differenceInSeconds, '$hours ${(hours == 1) ? 'hour' : 'hours'} ago'); + return MapEntry( + differenceInSeconds, '$hours ${(hours == 1) ? 'hour' : 'hours'} ago'); } else if (differenceInSeconds < month) { int days = (differenceInSeconds / day).floor(); if (days == 1) { @@ -27,9 +31,17 @@ MapEntry parseDateString(String timestamp) { } } else { int months = (differenceInSeconds / month).floor(); - return MapEntry(differenceInSeconds, '$months ${(months == 1) ? 'month' : 'months'} ago'); + return MapEntry(differenceInSeconds, + '$months ${(months == 1) ? 'month' : 'months'} ago'); } } catch (e) { - return MapEntry(0, timestamp); // Return 0 differenceInSeconds in case of an error. + return MapEntry( + 0, timestamp); // Return 0 differenceInSeconds in case of an error. } -} \ No newline at end of file +} + +String convertToIso8601(String inputTime, String inputFormatString) { + DateFormat inputFormat = DateFormat(inputFormatString); + DateTime parsedTime = inputFormat.parse(inputTime); + return parsedTime.toString(); +} From 93b1745998a58a326ae80ce70edf2a7de57f7cb9 Mon Sep 17 00:00:00 2001 From: kshib Date: Mon, 1 Jan 2024 02:34:08 +0530 Subject: [PATCH 08/16] add new sources --- lib/model/publisher.dart | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/lib/model/publisher.dart b/lib/model/publisher.dart index 2d9fb29..5611da9 100644 --- a/lib/model/publisher.dart +++ b/lib/model/publisher.dart @@ -1,17 +1,23 @@ import 'package:whapp/extractor/general/national/india/thewire.dart'; import 'package:whapp/extractor/general/world/aljazeera.dart'; import 'package:whapp/extractor/general/world/bbc.dart'; +import 'package:whapp/extractor/general/world/nitter.dart'; import 'package:whapp/extractor/general/world/reuters.dart'; +import 'package:whapp/extractor/technology/arstechnica.dart'; +import 'package:whapp/extractor/technology/bleepingcomputer.dart'; +import 'package:whapp/extractor/technology/engadget.dart'; import 'package:whapp/extractor/technology/theverge.dart'; import 'package:whapp/extractor/technology/torrentfreak.dart'; import 'package:whapp/model/article.dart'; import 'package:whapp/utils/string.dart'; - - Map publishers = { "Al Jazeera": AlJazeera(), + "Ars Technica": ArsTechnica(), "BBC": BBC(), + "BleepingComputer": BleepingComputer(), + "Engadget": Engadget(), + "Nitter": Nitter(), "Reuters": Reuters(), "The Verge": TheVerge(), "The Wire": TheWire(), @@ -29,7 +35,6 @@ abstract class Publisher { Future> get categories; - Future> articles({String category = "All", int page = 1}) { return category.startsWith("#") ? searchedArticles(searchQuery: getAsSearchQuery(category), page: page) From d7c26ec0b51bc856c922e9719648266656581cc8 Mon Sep 17 00:00:00 2001 From: kshib Date: Mon, 1 Jan 2024 02:34:45 +0530 Subject: [PATCH 09/16] add method: fill --- lib/model/article.dart | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/lib/model/article.dart b/lib/model/article.dart index 2e16fd2..808257e 100644 --- a/lib/model/article.dart +++ b/lib/model/article.dart @@ -21,6 +21,27 @@ class NewsArticle { this.publishedAt, ); + NewsArticle fill({ + String? title, + String? content, + String? excerpt, + String? author, + String? url, + String? thumbnail, + MapEntry? publishedAt, + }) { + return NewsArticle( + publisher, + title ?? this.title, + content ?? this.content, + excerpt ?? this.excerpt, + author ?? this.author, + url ?? this.url, + thumbnail ?? this.thumbnail, + publishedAt ?? this.publishedAt, + ); + } + Map toJson() { return { 'publisher': publisher.toJson(), From 91f3f1cc67ab6a53ed9252e6f45acc78354aeba2 Mon Sep 17 00:00:00 2001 From: kshib Date: Mon, 1 Jan 2024 02:34:59 +0530 Subject: [PATCH 10/16] add common test methods --- test/extractor/common.dart | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 test/extractor/common.dart diff --git a/test/extractor/common.dart b/test/extractor/common.dart new file mode 100644 index 0000000..9c4ba59 --- /dev/null +++ b/test/extractor/common.dart @@ -0,0 +1,36 @@ +import 'package:flutter_test/flutter_test.dart'; +import 'package:whapp/model/article.dart'; +import 'package:whapp/model/publisher.dart'; + +class ExtractorTest { + static Future categoriesTest(Publisher publisher) async { + final categories = await publisher.categories; + + expect(categories, isA>()); + expect(categories.isNotEmpty, true); + } + + static Future categoryArticlesTest(Publisher publisher) async { + final Map categories = await publisher.categories; + final categoryArticles = await publisher.categoryArticles(category: categories.entries.first.value, page: 1); + + expect(categoryArticles, isNotEmpty); + + var article = categoryArticles.first; + expect(article, isA()); + expect(article?.title, isNotEmpty); + expect(article?.publishedAt.value, isNot(0)); + } + + static Future searchedArticlesTest(Publisher publisher, String query) async { + final searchArticles = + await publisher.searchedArticles(searchQuery: 'world', page: 1); + + expect(searchArticles, isNotEmpty); + + var article = searchArticles.first; + expect(article, isA()); + expect(article?.title, isNotEmpty); + expect(article?.publishedAt.value, isNot(0)); + } +} \ No newline at end of file From dc35836bbf80f89bcd309bc73c77ab2e916f3565 Mon Sep 17 00:00:00 2001 From: kshib Date: Mon, 1 Jan 2024 17:02:28 +0530 Subject: [PATCH 11/16] fix extractor issues --- lib/extractor/technology/arstechnica.dart | 14 ++++++-------- lib/extractor/technology/bleepingcomputer.dart | 14 ++++++-------- lib/extractor/technology/engadget.dart | 15 ++++++++------- test/extractor/technology/bleepingcomputer.dart | 10 ++++++++-- 4 files changed, 28 insertions(+), 25 deletions(-) diff --git a/lib/extractor/technology/arstechnica.dart b/lib/extractor/technology/arstechnica.dart index b842b93..1888be7 100644 --- a/lib/extractor/technology/arstechnica.dart +++ b/lib/extractor/technology/arstechnica.dart @@ -15,13 +15,11 @@ class ArsTechnica extends Publisher { @override Future article(NewsArticle newsArticle) async { - var response = await http.get(Uri.parse("$homePage${newsArticle.url}")); + var response = await http.get(Uri.parse(newsArticle.url)); if (response.statusCode == 200) { Document document = html_parser.parse(utf8.decode(response.bodyBytes)); - Element? articleElement = document.querySelector("article"); - String? thumbnail = articleElement - ?.querySelector("p img") - ?.attributes["src"]; + Element? articleElement = document.querySelector(".article-content"); + String? thumbnail = ""; String? content = articleElement?.innerHtml; return newsArticle.fill(content: content, thumbnail: thumbnail); } @@ -30,7 +28,7 @@ class ArsTechnica extends Publisher { @override Future> get categories async => { - "News": "news", + "News": "", "Reviews": "reviews", "Guides": "guides", "Gaming": "gaming", @@ -65,7 +63,7 @@ class ArsTechnica extends Publisher { .querySelector("figure div") ?.attributes["style"]; - NewsArticle( + articles.add(NewsArticle( this, title ?? "", "", @@ -74,7 +72,7 @@ class ArsTechnica extends Publisher { url ?? "", extractUrl(thumbnail), parseDateString(date), - ); + )); } } return articles; diff --git a/lib/extractor/technology/bleepingcomputer.dart b/lib/extractor/technology/bleepingcomputer.dart index 501cb94..56406c9 100644 --- a/lib/extractor/technology/bleepingcomputer.dart +++ b/lib/extractor/technology/bleepingcomputer.dart @@ -15,13 +15,11 @@ class BleepingComputer extends Publisher { @override Future article(NewsArticle newsArticle) async { - var response = await http.get(Uri.parse("$homePage${newsArticle.url}")); + var response = await http.get(Uri.parse(newsArticle.url)); if (response.statusCode == 200) { Document document = html_parser.parse(utf8.decode(response.bodyBytes)); - Element? articleElement = document.querySelector(".article_section"); - String? thumbnail = articleElement - ?.querySelector("p img") - ?.attributes["src"]; + Element? articleElement = document.querySelector(".articleBody"); + String? thumbnail = ""; String? content = articleElement?.innerHtml; return newsArticle.fill(content: content, thumbnail: thumbnail,); } @@ -37,7 +35,7 @@ class BleepingComputer extends Publisher { @override Future> categoryArticles({String category = "", int page = 1}) async { Set articles = {}; - var response = await http.get(Uri.parse("$homePage/news/page/$page")); + var response = await http.get(Uri.parse(page!=1?"$homePage/news/page/$page":"$homePage/news/")); if (response.statusCode == 200) { Document document = html_parser.parse(utf8.decode(response.bodyBytes)); List articleElements = @@ -55,7 +53,7 @@ class BleepingComputer extends Publisher { String? time = articleElement.querySelector(".bc_news_time")?.text; String parsedTime = convertToIso8601("$date $time", "MMMM dd, yyyy hh:mm a"); - NewsArticle( + articles.add(NewsArticle( this, title ?? "", content, @@ -64,7 +62,7 @@ class BleepingComputer extends Publisher { url ?? "", thumbnail ?? "", parseDateString(parsedTime), - ); + )); } } return articles; diff --git a/lib/extractor/technology/engadget.dart b/lib/extractor/technology/engadget.dart index d836381..f45ee85 100644 --- a/lib/extractor/technology/engadget.dart +++ b/lib/extractor/technology/engadget.dart @@ -18,7 +18,7 @@ class Engadget extends Publisher { var response = await http.get(Uri.parse("$homePage${newsArticle.url}")); if (response.statusCode == 200) { Document document = html_parser.parse(utf8.decode(response.bodyBytes)); - Element? articleElement = document.querySelector(".article_section"); + Element? articleElement = document.querySelector(".caas-body"); String? thumbnail = articleElement ?.querySelector("p img") ?.attributes["src"]; @@ -43,7 +43,8 @@ class Engadget extends Publisher { @override Future> categoryArticles({String category = "news", int page = 1}) async { Set articles = {}; - var response = await http.get(Uri.parse("$homePage/$category/page/$page")); + if(category=="/") category = "/news"; + var response = await http.get(Uri.parse("$homePage$category/page/$page")); if (response.statusCode == 200) { Document document = html_parser.parse(utf8.decode(response.bodyBytes)); List articleElements = @@ -53,14 +54,14 @@ class Engadget extends Publisher { String? excerpt = articleElement.querySelector("h2+div")?.text; String? authorTime = articleElement.querySelector("div[class*=engadgetFontDarkGray]")?.text; String? author = authorTime?.split(",")[0].replaceFirst("By ", ""); - String? date = authorTime?.split(",")[1]; + String? date = authorTime?.split(",").last.trim(); String? url = articleElement.querySelector("h2 a")?.attributes["href"]; String? thumbnail = articleElement .querySelector("img[width]") ?.attributes["src"]; String parsedTime = convertToIso8601("$date", "MM.dd.yyyy"); - NewsArticle( + articles.add(NewsArticle( this, title ?? "", "", @@ -69,7 +70,7 @@ class Engadget extends Publisher { url ?? "", thumbnail ?? "", parseDateString(parsedTime), - ); + )); } } return articles; @@ -94,7 +95,7 @@ class Engadget extends Publisher { ?.attributes["src"]; String parsedTime = convertToIso8601("$date", "MM.dd.yyyy"); - NewsArticle( + articles.add(NewsArticle( this, title ?? "", "", @@ -103,7 +104,7 @@ class Engadget extends Publisher { url ?? "", thumbnail ?? "", parseDateString(parsedTime), - ); + )); } } return articles; diff --git a/test/extractor/technology/bleepingcomputer.dart b/test/extractor/technology/bleepingcomputer.dart index d990038..669cee6 100644 --- a/test/extractor/technology/bleepingcomputer.dart +++ b/test/extractor/technology/bleepingcomputer.dart @@ -8,11 +8,17 @@ void main() { Publisher publisher = BleepingComputer(); test('Extract Categories Test', () async { - await ExtractorTest.categoriesTest(publisher); + }); test('Category Articles Test', () async { - await ExtractorTest.categoryArticlesTest(publisher); + final categoryArticles = await publisher.categoryArticles(category: "", page: 1); + + expect(categoryArticles, isNotEmpty); + + var article = categoryArticles.first; + expect(article?.title, isNotEmpty); + expect(article?.publishedAt.value, isNot(0)); }); test('Search Articles Test', () async { From 444ae80782e04de411ed97fd9f024a8131095cf4 Mon Sep 17 00:00:00 2001 From: kshib Date: Mon, 1 Jan 2024 17:02:52 +0530 Subject: [PATCH 12/16] add fallback when parsing fails --- lib/utils/time.dart | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/utils/time.dart b/lib/utils/time.dart index 17047ee..410465c 100644 --- a/lib/utils/time.dart +++ b/lib/utils/time.dart @@ -41,7 +41,11 @@ MapEntry parseDateString(String timestamp) { } String convertToIso8601(String inputTime, String inputFormatString) { - DateFormat inputFormat = DateFormat(inputFormatString); - DateTime parsedTime = inputFormat.parse(inputTime); - return parsedTime.toString(); + try { + DateFormat inputFormat = DateFormat(inputFormatString); + DateTime parsedTime = inputFormat.parse(inputTime); + return parsedTime.toString(); + } catch (e) { + return inputTime; + } } From 86dfa957b85f17f9f294b3d36de976056c635992 Mon Sep 17 00:00:00 2001 From: kshib Date: Mon, 1 Jan 2024 17:03:51 +0530 Subject: [PATCH 13/16] simplify category names --- lib/pages/subscription.dart | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pages/subscription.dart b/lib/pages/subscription.dart index ab76641..7bb0b71 100644 --- a/lib/pages/subscription.dart +++ b/lib/pages/subscription.dart @@ -100,7 +100,7 @@ class _SubscriptionsPageState extends State with AutomaticKee String getSelectedCategories(String newsSource) { var categories = Store.selectedSubscriptions .where((element) => element.publisher==newsSource) - .map((e) => e.category) + .map((e) => e.category!="/"?e.category.split("/").last:e.category) .join(", "); return categories; } From 551c3bff5068dd788dc1c87cb0cec76e581a246b Mon Sep 17 00:00:00 2001 From: kshib Date: Mon, 1 Jan 2024 17:04:07 +0530 Subject: [PATCH 14/16] add null check --- lib/pages/full_article.dart | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pages/full_article.dart b/lib/pages/full_article.dart index 00c1f17..ee26bcc 100644 --- a/lib/pages/full_article.dart +++ b/lib/pages/full_article.dart @@ -157,7 +157,7 @@ class HtmlWidget extends StatelessWidget { var src = extensionContext.attributes.containsKey("data-lazy-src") ? "data-lazy-src" : "src"; - return Network.shouldLoadImage(extensionContext.attributes[src]!) + return extensionContext.attributes[src]!=null && Network.shouldLoadImage(extensionContext.attributes[src]!) ? CachedNetworkImage( imageUrl: extensionContext.attributes[src]!, progressIndicatorBuilder: (context, url, downloadProgress) { From 485246aaadd3c51767815700a99c768e4a8ac241 Mon Sep 17 00:00:00 2001 From: kshib Date: Mon, 1 Jan 2024 17:05:32 +0530 Subject: [PATCH 15/16] test searchedArticles only when publisher has search support --- test/extractor/common.dart | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/test/extractor/common.dart b/test/extractor/common.dart index 9c4ba59..f4bf794 100644 --- a/test/extractor/common.dart +++ b/test/extractor/common.dart @@ -23,14 +23,16 @@ class ExtractorTest { } static Future searchedArticlesTest(Publisher publisher, String query) async { - final searchArticles = - await publisher.searchedArticles(searchQuery: 'world', page: 1); + if (publisher.hasSearchSupport) { + final searchArticles = + await publisher.searchedArticles(searchQuery: query, page: 1); - expect(searchArticles, isNotEmpty); + expect(searchArticles, isNotEmpty); - var article = searchArticles.first; - expect(article, isA()); - expect(article?.title, isNotEmpty); - expect(article?.publishedAt.value, isNot(0)); + var article = searchArticles.first; + expect(article, isA()); + expect(article?.title, isNotEmpty); + expect(article?.publishedAt.value, isNot(0)); + } } } \ No newline at end of file From 863659254c0267229e688520ccedd013cfac6980 Mon Sep 17 00:00:00 2001 From: kshib Date: Mon, 1 Jan 2024 17:10:27 +0530 Subject: [PATCH 16/16] v0.1.1 --- pubspec.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pubspec.yaml b/pubspec.yaml index 4b18fc1..08b3ee6 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -2,7 +2,7 @@ name: whapp description: "News aggregator" publish_to: 'none' -version: 0.1.0+1 +version: 0.1.1+1 environment: sdk: '>=3.2.1 <4.0.0'