-
Notifications
You must be signed in to change notification settings - Fork 18
/
UrlsToMergedPdf.linq
100 lines (83 loc) · 3.14 KB
/
UrlsToMergedPdf.linq
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
<Query Kind="Program">
<NuGetReference Version="2.0.0-alpha0002" Prerelease="true">Gotenberg.Sharp.API.Client</NuGetReference>
<Namespace>Gotenberg.Sharp.API.Client</Namespace>
<Namespace>Gotenberg.Sharp.API.Client.Domain.Builders</Namespace>
<Namespace>Gotenberg.Sharp.API.Client.Domain.Builders.Faceted</Namespace>
<Namespace>Gotenberg.Sharp.API.Client.Domain.Requests</Namespace>
<Namespace>System.Net.Http</Namespace>
<Namespace>System.Threading.Tasks</Namespace>
</Query>
//NOTE: You need to increase gotenberg api's timeout for this to work
//by passing --api-timeout=1800s when running the container.
static Random Rand = new Random(Math.Abs( (int) DateTime.Now.Ticks));
async Task Main()
{
var path = await CreateWorldNewsSummary($@"D:\NewsArchive");
var info = new ProcessStartInfo{ FileName = path, UseShellExecute = true};
Process.Start(info);
path.Dump("Done");
}
public async Task<string> CreateWorldNewsSummary(string destinationDirectory)
{
var sites = new[] {
"https://www.nytimes.com","https://www.axios.com/",
"https://www.cnn.com", "https://www.csmonitor.com",
"https://www.wsj.com", "https://www.usatoday.com",
"https://www.irishtimes.com", "https://www.lemonde.fr",
"https://calgaryherald.com", "https://www.bbc.com/news/uk",
"https://english.elpais.com/", "https://www.thehindu.com",
"https://www.theaustralian.com.au", "https://www.welt.de",
"https://www.cankaoxiaoxi.com", "https://www.novinky.cz",
"https://www.elobservador.com.uy"}
.Select(u => new Uri(u));
var builders = CreateRequestBuilders(sites);
var requests = builders.Select(b => b.Build());
return await ExecuteRequestsAndMerge(requests, destinationDirectory);
}
IEnumerable<UrlRequestBuilder> CreateRequestBuilders(IEnumerable<Uri> uris)
{
foreach (var uri in uris)
{
yield return new UrlRequestBuilder()
.SetUrl(uri)
.SetConversionBehaviors(b =>
b.EmulateAsScreen()
.SetUserAgent(nameof(GotenbergSharpClient)))
.ConfigureRequest(b =>
{
b.SetPageRanges("1-2");
})
.WithDimensions(b =>
{
b.SetMargins(Margins.None)
.MarginLeft(.3)
.MarginRight(.3);
});
}
}
async Task<string> ExecuteRequestsAndMerge(IEnumerable<UrlRequest> requests, string destinationDirectory)
{
var innerClient = new HttpClient {
BaseAddress = new Uri("http://localhost:3000"),
Timeout = TimeSpan.FromMinutes(7)
};
var sharpClient = new GotenbergSharpClient(innerClient);
var tasks = requests.Select(r => sharpClient.UrlToPdfAsync(r, CancellationToken.None));
var results = await Task.WhenAll(tasks);
var mergeBuilder = new MergeBuilder()
.WithAssets(b =>
{
b.AddItems(results.Select((r, i) => KeyValuePair.Create($"{i}.pdf", r)));
});
var response = await sharpClient.MergePdfsAsync(mergeBuilder.Build());
return await WriteFileAndGetPath(response, destinationDirectory);
}
async Task<string> WriteFileAndGetPath(Stream responseStream, string desinationDirectory)
{
var fullPath = @$"{desinationDirectory}\{DateTime.Now.ToString("yyyy-MM-MMMM-dd")}-{Rand.Next()}.pdf";
using (var destinationStream = File.Create(fullPath))
{
await responseStream.CopyToAsync(destinationStream);
}
return fullPath;
}