Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
ksh-b committed Jan 1, 2024
2 parents 525b577 + 8636592 commit 5ddef7b
Show file tree
Hide file tree
Showing 27 changed files with 701 additions and 350 deletions.
31 changes: 11 additions & 20 deletions lib/extractor/general/national/india/thewire.dart
Original file line number Diff line number Diff line change
Expand Up @@ -39,27 +39,16 @@ class TheWire extends Publisher {
}

@override
Future<NewsArticle?> article(String url) async {
var response = await http
.get(Uri.parse('$homePage$url'));
Future<NewsArticle?> article(NewsArticle newsArticle) async {
var response = await http.get(Uri.parse('$homePage${newsArticle.url}'));
if (response.statusCode == 200) {
var data = json.decode(response.body);
var postDetail = data["post-detail"][0];
var title = postDetail["post_title"];
var article = postDetail["post_content"];
var author = postDetail["post_author_name"][0]["author_name"];
var excerpt = postDetail["post_excerpt"];
var content = postDetail["post_content"];
var thumbnail = postDetail["featured_image"][0];
var time = postDetail["post_date_gmt"];
return NewsArticle(
this,
title ?? "",
article ?? "",
excerpt ?? "",
author ?? "",
url,
thumbnail ?? "",
parseDateString(time?.trim() ?? ""),
return newsArticle.fill(
content: content,
thumbnail: thumbnail,
);
}
return null;
Expand All @@ -84,9 +73,10 @@ class TheWire extends Publisher {
for (var element in data) {
var title = element['post_title'];
var author = element['post_author_name'][0]["author_name"];
var thumbnail = element['hero_image'][0]; //element['thumbnail']['url'];
var thumbnail = element['hero_image'][0];
var time = element["post_date_gmt"];
var articleUrl = '/wp-json/thewire/v2/posts/detail/${element['post_name']}';
var articleUrl =
'/wp-json/thewire/v2/posts/detail/${element['post_name']}';
var excerpt = element['post_excerpt'];
articles.add(NewsArticle(
this,
Expand All @@ -109,7 +99,8 @@ class TheWire extends Publisher {
if (category == '/') {
category = 'home';
}
String apiUrl = '$homePage/wp-json/thewire/v2/posts/$category/recent-stories?page=$page&per_page=10';
String apiUrl =
'$homePage/wp-json/thewire/v2/posts/$category/recent-stories?page=$page&per_page=10';
return extract(apiUrl, false);
}

Expand Down
30 changes: 5 additions & 25 deletions lib/extractor/general/world/aljazeera.dart
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import 'package:intl/intl.dart';
import 'package:whapp/model/article.dart';
import 'package:whapp/model/publisher.dart';
import 'dart:convert';
Expand Down Expand Up @@ -30,38 +29,19 @@ class AlJazeera extends Publisher {
}

@override
Future<NewsArticle?> article(String url) async {
var response = await http.get(Uri.parse('$homePage$url'));
Future<NewsArticle?> article(NewsArticle newsArticle) async {
var response = await http.get(Uri.parse('$homePage${newsArticle.url}'));
if (response.statusCode == 200) {
var document = html_parser.parse(utf8.decode(response.bodyBytes));

var article = document.getElementById("main-content-area");

var titleElement = article?.querySelector('h1');
var excerptElement = article?.querySelector('em');
var thumbnailElement = article?.querySelector('img');
var articleElement = article?.querySelector('.wysiwyg');
var authorElement = article?.querySelector('.author-link');
var timeElement = article?.querySelector('.date-simple span[aria-hidden]');
var title = titleElement?.text;
var content = articleElement?.text;
var author = authorElement?.text;
var excerpt = excerptElement?.text;
var thumbnail = "$homePage${thumbnailElement?.attributes["src"]}";
var time = timeElement?.text;

if (time!=null) {
time = DateFormat('d MMM yyyy').parse(time).toString();
}
return NewsArticle(
this,
title ?? "",
content ?? "",
excerpt ?? "",
author ?? "",
url,
thumbnail,
parseDateString(time?.trim() ?? ""),
return newsArticle.fill(
content: content,
thumbnail: thumbnail,
);
}
return null;
Expand Down
23 changes: 12 additions & 11 deletions lib/extractor/general/world/bbc.dart
Original file line number Diff line number Diff line change
Expand Up @@ -157,30 +157,31 @@ class BBC extends Publisher {
}

@override
Future<NewsArticle?> article(String url) async {
var response = await http.get(Uri.parse("$homePage$url"));
Future<NewsArticle?> article(NewsArticle newsArticle) async {
var response = await http.get(Uri.parse("$homePage${newsArticle.url}"));
if (response.statusCode == 200) {
var document = html_parser.parse(utf8.decode(response.bodyBytes));

var titleElement = document.querySelector('article h1');
var articleElement = document.querySelectorAll('article p');
var excerptElement = document.querySelector('article div b');
var timeElement = document.querySelector('article time');
var thumbnailElement = document.querySelector('article img');
var authorElement = document.querySelector("article div[class*=TextContributorName]");
var article = document.querySelector('.article__main:nth-child(1)');
var titleElement = article?.querySelector('h1');
var excerptElement = article?.querySelector('div b');
var timeElement = article?.querySelector('time');
var thumbnailElement = article?.querySelector('img');
var authorElement = article?.querySelector("div[class*=TextContributorName]");
var title = titleElement?.text;
var article = articleElement.sublist(1).map((e) => "<p>${e.text}</p>").join();
var content = article?.innerHtml;
var author = authorElement?.text.replaceFirst("By ", "");
var excerpt = excerptElement?.text;
var thumbnail = thumbnailElement?.attributes["src"];
var time = timeElement?.attributes["datetime"];

return NewsArticle(
this,
title ?? "",
article,
content ?? "",
excerpt ?? "",
author ?? "",
url,
newsArticle.url,
thumbnail ?? "",
parseDateString(time?.trim() ?? ""),
);
Expand Down
111 changes: 111 additions & 0 deletions lib/extractor/general/world/nitter.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import 'package:html/dom.dart';
import 'package:intl/intl.dart';
import 'package:whapp/model/article.dart';
import 'package:whapp/model/publisher.dart';
import 'dart:convert';
import 'package:http/http.dart' as http;
import 'package:html/parser.dart' as html_parser;
import 'package:whapp/utils/time.dart';

class Nitter extends Publisher {
Map nextCursor = {};

@override
String get homePage => "https://nitter.net";

@override
String get name => "Nitter";

@override
Future<Map<String, String>> get categories async => {};

Future<Set<NewsArticle?>> extract(String category, int page,
{String query = ""}) async {
Set<NewsArticle?> articles = {};
var dates = generateWeekDates(page);
var response = await http.get(
Uri.parse(
"$homePage/$category/search?f=tweets&q=$query&since=${dates[0]}&until=${dates[1]}"),
headers: {
'Host': 'nitter.net',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:121.0) Gecko/20100101 Firefox/121.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
},
);
if (response.statusCode == 200) {
Document document = html_parser.parse(utf8.decode(response.bodyBytes));
List<Element> articleElements = document.querySelectorAll(".tweet-body");
for (Element articleElement in articleElements) {
String? title = articleElement
.querySelector(".tweet-content")
?.text
.split("\n")
.first;
String? excerpt = articleElement.querySelector(".tweet-content")?.text;
String? author = articleElement.querySelector(".username")?.text;
String? url =
articleElement.querySelector(".tweet-link")?.attributes["href"];
String? thumbnail = "";
String? content = "";
String? date =
articleElement.querySelector(".tweet-date a")?.attributes["title"];
String parsedTime =
convertToIso8601("$date", "MMM d, yyyy · h:mm a UTC");

articles.add(NewsArticle(
this,
title ?? "",
content,
excerpt ?? "",
author ?? "",
"$homePage$url",
thumbnail,
parseDateString(parsedTime),
));
}
return articles;
}
return articles;
}

@override
Future<Set<NewsArticle?>> categoryArticles(
{String category = "", int page = 1}) async {
if (category.isEmpty || category == "/") return {};
return extract(category, page);
}

@override
Future<Set<NewsArticle?>> searchedArticles(
{required String searchQuery, int page = 1}) async {
if (!searchQuery.contains("#")) return {};
return extract(searchQuery.split("#")[0], page,
query: searchQuery.split("#")[1]);
}

@override
Future<NewsArticle?> article(NewsArticle newsArticle) async {
var response = await http.get(Uri.parse(newsArticle.url));
if (response.statusCode == 200) {
Document document = html_parser.parse(utf8.decode(response.bodyBytes));
return newsArticle.fill(
content: document.querySelector(".tweet-body")?.text ?? "");
}
return null;
}

List<String> generateWeekDates(int page) {
DateTime currentDate = DateTime.now();
DateTime untilDate;
if (page == 1) {
untilDate = currentDate;
} else {
untilDate = currentDate.subtract(Duration(days: 7 * (page - 1)));
}
DateTime sinceDate = untilDate.subtract(Duration(days: 7));
String sinceDateString = DateFormat('yyyy-MM-dd').format(sinceDate);
String untilDateString = DateFormat('yyyy-MM-dd').format(untilDate);
return [sinceDateString, untilDateString];
}
}
25 changes: 5 additions & 20 deletions lib/extractor/general/world/reuters.dart
Original file line number Diff line number Diff line change
Expand Up @@ -33,29 +33,14 @@ class Reuters extends Publisher {
}

@override
Future<NewsArticle?> article(String url) async {
var response = await http.get(Uri.parse('https://neuters.de$url'));
Future<NewsArticle?> article(NewsArticle newsArticle) async {
var response = await http.get(Uri.parse('https://neuters.de${newsArticle.url}'));
if (response.statusCode == 200) {
var document = html_parser.parse(utf8.decode(response.bodyBytes));

var titleElement = document.querySelector('h1');
var articleElement = document.querySelectorAll('p:not(.byline)');
var timeAuthorElement = document.querySelector('.byline');
var title = titleElement?.text;
var article = articleElement.map((e) => e.text).join("\n");
var author = timeAuthorElement?.text.split(" - ")[1];
var excerpt = "";
var thumbnail = "";
var time = timeAuthorElement?.text.split(" - ")[0];
return NewsArticle(
this,
title ?? "",
article,
excerpt,
author ?? "",
url,
thumbnail,
parseDateString(time?.trim() ?? ""),
var content = articleElement.map((e) => e.text).join("\n");
return newsArticle.fill(
content: content,
);
}
return null;
Expand Down
98 changes: 98 additions & 0 deletions lib/extractor/technology/arstechnica.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import 'package:html/dom.dart';
import 'package:whapp/model/article.dart';
import 'package:whapp/model/publisher.dart';
import 'dart:convert';
import 'package:http/http.dart' as http;
import 'package:html/parser.dart' as html_parser;
import 'package:whapp/utils/time.dart';

class ArsTechnica extends Publisher {
@override
String get homePage => "https://arstechnica.com";

@override
String get name => "Ars Technica";

@override
Future<NewsArticle?> article(NewsArticle newsArticle) async {
var response = await http.get(Uri.parse(newsArticle.url));
if (response.statusCode == 200) {
Document document = html_parser.parse(utf8.decode(response.bodyBytes));
Element? articleElement = document.querySelector(".article-content");
String? thumbnail = "";
String? content = articleElement?.innerHtml;
return newsArticle.fill(content: content, thumbnail: thumbnail);
}
return null;
}

@override
Future<Map<String, String>> get categories async => {
"News": "",
"Reviews": "reviews",
"Guides": "guides",
"Gaming": "gaming",
"Gear": "gear",
"Entertainment": "entertainment",
"Tomorrow": "tomorrow",
"Deals": "deals",
};

@override
bool get hasSearchSupport => false;

@override
Future<Set<NewsArticle?>> categoryArticles({String category = "", int page = 1}) async {
Set<NewsArticle> articles = {};
if(category.isNotEmpty) {
category="/$category";
}

var response = await http.get(Uri.parse("$homePage$category/page/$page"));
if (response.statusCode == 200) {
Document document = html_parser.parse(utf8.decode(response.bodyBytes));
List<Element> articleElements =
document.querySelectorAll(".article");
for (Element articleElement in articleElements) {
String? title = articleElement.querySelector("h2 a")?.text;
String? excerpt = articleElement.querySelector(".excerpt")?.text;
String? author = articleElement.querySelector("span[itemprop=name]")?.text;
String? date = articleElement.querySelector("time")?.attributes["datetime"] ?? "";
String? url = articleElement.querySelector("h2 a")?.attributes["href"];
String? thumbnail = articleElement
.querySelector("figure div")
?.attributes["style"];

articles.add(NewsArticle(
this,
title ?? "",
"",
excerpt ?? "",
author ?? "",
url ?? "",
extractUrl(thumbnail),
parseDateString(date),
));
}
}
return articles;
}

String extractUrl(String? inputString) {
RegExp regExp = RegExp(r"url\('([^']*)'\)");
if(inputString!=null) {
Match? match = regExp.firstMatch(inputString);
if (match != null) {
return match.group(1)!;
} else {
return "";
}
}
return "";
}

@override
Future<Set<NewsArticle?>> searchedArticles({required String searchQuery, int page = 1}) async{
return {};
}
}
Loading

0 comments on commit 5ddef7b

Please sign in to comment.