forked from kragen/knuth-interview-2006
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfetch
executable file
·51 lines (42 loc) · 1.34 KB
/
fetch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/perl -w
use strict;
use LWP::Simple qw(get);
=head1 NAME
fetch - converts the Knuth interview from Web of Stories into Markdown
=head1 DESCRIPTION
Run it; it hits the site and outputs Markdown. You could probably use
it to get Markdown transcripts from other Web of Stories interviews
too if you change C<@ids>.
=cut
my @ids = (17060..17156);
for my $id (@ids) {
my $url = "http://webofstories.com/play/$id";
my $page = get $url;
my @lines = split /\n/, $page;
my ($title, $body);
for (0..$#lines) {
if (not defined $title and $lines[$_] =~ /<h3>/) {
$title = $lines[$_+1];
for ($title) { s/\s+/ /g; s/^\s*//; s/\s*$// }
}
if ($lines[$_] =~ /<div class="transcriptText">/) {
# Usually it's a single line‚ but in e.g.
# <http://webofstories.com/play/17066> it's actually multiple
# paragraphs (missing the <p> tags in HTML!)
for my $endline ($_..$#lines) {
if ($lines[$endline] =~ m|</div>|) {
$body = join "\n", @lines[$_+1..$endline-1];
last;
}
}
for ($body) { s/[\t\r ]+/ /g; s/^\s*//; s/\s*$// }
}
}
my $____ = "-" x ((length $title) + (length $url) + 4);
print <<EOF;
[$title]($url)
$____
$body
EOF
sleep 1;
}