-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate-hourly.pl
170 lines (143 loc) · 4.72 KB
/
generate-hourly.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#! /usr/bin/env perl
use Modern::Perl '2015';
###
use Getopt::Long;
use Template;
use FindBin qw/$Bin/;
use utf8;
use Time::HiRes qw/gettimeofday tv_interval/;
use Data::Dumper;
use HNLOlib qw/get_dbh get_all_sets $feeds update_scores $sql sec_to_dhms/;
use List::Util qw/all/;
use open qw/ :std :encoding(utf8) /;
binmode(STDOUT, ":encoding(UTF-8)");
my $update_score;
GetOptions( 'update_score' => \$update_score );
### Definitions and constants
my $debug = 0;
my $page_title = 'HN&&LO recent links';
my $no_of_days_to_show = 3;
my $ratio_limit = 9;
my $ua;
my $dbh = get_dbh;
$dbh->{sqlite_unicode} = 1;
#### CODE ####
my $now=time();
my $t0 = [gettimeofday];
my $generation_log;
say gmtime . " starting, fetching 10d data... " if $debug;
# get all pairs from the DB
my $sth = $dbh->prepare( $sql->{get_pairs_10d} );
my %sets = %{ get_all_sets($sth) };
say gmtime . " got all sets... " if $debug;
$generation_log .= sec_to_dhms(tv_interval($t0)).' - got all sets<br />';
# coerce into list
# filter entries older than the retention time
my @pairs;
my $limit_seconds = $no_of_days_to_show * 24 * 3600;
my ( $min_hn_id, $max_hn_id) = (27_076_741+10_000_000,-1);
foreach my $url (sort {$sets{$b}->{first_seen} <=> $sets{$a}->{first_seen}} keys %sets) {
next if all {$now - $_->{time}>$limit_seconds } @{$sets{$url}->{sequence}};
# filter single entries
next unless exists $sets{$url}->{sequence};
for (@{$sets{$url}->{sequence}}) {
if ($_->{tag} eq 'hn') {
if ($_->{id}>$max_hn_id) { $max_hn_id=$_->{id} }
if ($_->{id}<$min_hn_id) { $min_hn_id=$_->{id} }
}
}
push @pairs, $sets{$url};
}
say gmtime . " got all pairs... " if $debug;
$generation_log .= sec_to_dhms(tv_interval($t0)).' - got all pairs after<br />';
$sth=$dbh->prepare($sql->{rank_sql});
$sth->execute( $min_hn_id, $max_hn_id);
my $hn_rank = $sth->fetchall_arrayref();
my %ranks;
for my $row (@$hn_rank) {
if (exists $ranks{$row->[0]}) {
$ranks{$row->[0]} = $row->[1] if $row->[1] < $ranks{$row->[0]}
} else {
$ranks{$row->[0]} = $row->[1]
}
}
=pod
## update the queue store with some entries from HN
my $queue_href= $dbh->selectall_hashref( "select id from hn_queue",'id');
my $hn_id_list;
for my $pair( @pairs) {
for my $entry (@{$pair->{sequence}}) {
if ($entry->{tag} eq 'hn' and !exists $queue_href->{$entry->{id}} and ($entry->{rank} and $entry->{rank}<=30)) {
say "added $entry->{id} $entry->{title} to queue";
push @{$hn_id_list}, $entry->{id};
}
}
}
if (scalar @$hn_id_list > 0) {
$sth=$dbh->
prepare("insert into hn_queue (id, age, retries) values (?,?,3001)");
my $offset =0;
for my $id(@{$hn_id_list}) {
my $age = time + 2 * 3_600 + 3_600/2 + $offset;
warn "$id not in queue, adding with age of $age";
$sth->execute( $id, $age ) ;
$offset += 5;
}
}
=cut
# update items if that option is set
if ($update_score) {
my $list_of_ids;
foreach my $pair (@pairs) {
foreach my $entry (@{$pair->{sequence}}) {
push @{$list_of_ids->{$entry->{tag}}} ,$entry->{id};
}
}
foreach my $label (sort keys %{$list_of_ids}) {
say "updating entries from $label. No. of IDs: ", scalar @{$list_of_ids->{$label}};
HNLOlib::update_from_list( $label, $list_of_ids->{$label} );
}
}
# calculate scores
foreach my $pair (@pairs) {
foreach my $item (@{$pair->{sequence}}) {
my $ratio = undef;
if ( $item->{score} != 0
and ( abs($item->{score}) + $item->{comments} > $ratio_limit ) )
{
$ratio = sprintf( '%.02f', $item->{comments} / abs($item->{score}) );
} elsif ($item->{score}==0 and $item->{comments} > $ratio_limit) {
$ratio = 100
}
$item->{ratio} = $ratio if defined $ratio;
if ($item->{tag} eq 'hn' and $ranks{$item->{id}} ) {
$item->{rank} = $ranks{$item->{id}}
}
}
}
say gmtime . " got all scores... " if $debug;
$generation_log .= sec_to_dhms(tv_interval($t0)).' - got all scores and done after <br />';
# clean up data for presentation
$now= time();
# generate the page from the data
my $dt_now =
DateTime->from_epoch( epoch => $now, time_zone => 'Europe/Stockholm' );
my $elapsed = sec_to_dhms(tv_interval($t0));
my %data = (
pairs => \@pairs,
meta => {
generate_time => $dt_now->strftime('%Y-%m-%d %H:%M:%S%z'),
page_title => $page_title,
no_of_days_to_show => $no_of_days_to_show,
ratio_limit => $ratio_limit,
generation_log => $generation_log,
},
);
my $tt =
Template->new( { INCLUDE_PATH => "$Bin/templates",ENCODING=>'UTF-8' } );
$tt->process(
'hourly.tt', \%data,
'/home/gustaf/public_html/hnlo/index.html',
{ binmode => ':utf8' }
) || die $tt->error;
say gmtime . " generated page, done. " if $debug;