-
Notifications
You must be signed in to change notification settings - Fork 0
/
delicious2wp
executable file
·144 lines (129 loc) · 4.54 KB
/
delicious2wp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/usr/bin/perl -W
# Read export file from del.icio.us, extract HREF and convert to WordPress
# Elmar Klausmeier, 30-Aug-2013
# Elmar Klausmeier, 01-Jan-2015
use strict;
use POSIX qw(strftime);
use Getopt::Std;
my %opts = ('d' => 0, 'n' => 0, 'o' => 0);
getopts('dn:o:',\%opts);
my $n = (($opts{'n'} > 0) ? $opts{'n'} : 0);
my $offset = (($opts{'o'} > 0) ? $opts{'o'} : 0);
my ($title,$tags,$taglines,$prev,$out) = ("","","","","");
my @add_date = ();
my ($add_date_rfc,$add_date_ymd) = ("","");
my ($links,$post_id) = (0,0);
print << "EOF";
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:excerpt="http://wordpress.org/export/1.2/excerpt/"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:wp="http://wordpress.org/export/1.2/">
<channel>
<title>Collected Links</title>
<link>http://klmlinks.wordpress.com</link>
<language>de</language>
<wp:wxr_version>1.2</wp:wxr_version>
<wp:author>
<wp:author_login>eklausmeier</wp:author_login>
<wp:author_email>Elmar.Klausmeier\@gmail.com</wp:author_email>
<wp:author_display_name><![CDATA[eklausmeier]]></wp:author_display_name>
<wp:author_first_name><![CDATA[]]></wp:author_first_name>
<wp:author_last_name><![CDATA[]]></wp:author_last_name>
</wp:author>
EOF
while (<>) {
($tags,@add_date) = ("",());
chomp;
s/\s+$//; # rtrim
s/\xC2\xAE/©/g; # copyright
s/\xE2\x84\xA2/™/g; # trademark
s/\xC3\xBC/ü/g; # klein ue
s/\xE2\x80\x99/'/g; # apostroph
s/\xC3\xA4/ä/g; # klein ae
s/\xE4\xA4/ä/g; # klein ae
s/\xE4\xB6/ö/g; # klein oe
s/\xC3\xB6/ö/g; # klein oe
s/\xC3\x9C/Ü/g; # gross Ue
s/\xC3\x9F/ß/g; # szet
s/\xC2\xA0//g; # backquote?
s/\xC3\xA9/é/g; # e apostophe
s/\xE4\xA9/é/g; # e apostophe
s/\xC3\xA8/è/g; # e accent grave
s/ & / & /g; # ampersand
#print substr($_,4) . "\n" if /^<DT><A HREF=/;
if (/^<\/DL>/) {
last;
} elsif (/^<DT><A HREF=/) {
print "]]></content:encoded>\n$taglines</item>\n" if ($links > 0);
$taglines = "";
++$links; # have now seen the first <DT>, i.e., first link
$post_id = $offset + $links;
last if ($n > 0 && $links > $n);
if (/ ADD_DATE="(\d+)"/) {
@add_date = localtime($1);
$add_date_rfc = strftime("%a %b %d %H:%M:%S CET %Y", @add_date);
$add_date_ymd = strftime("%Y-%m-%d %H:%M:%S", @add_date);
}
s/^<DT>//; # strip <DT>
s/<A HREF=/<a href=/; # upercase
s/<\/A>$/<\/a>/; # uppercase
s/ PRIVATE="0"//; # drop useless stuff
$tags = $1 if (/TAGS="([äöüßÄÖÜÄÖÜäöüßéèùçø \w:\@\/\(\)\#=\-\+\!\.,\'\&]*)"/);
$title = $1 if (/<a href=.+">(.*)<\/a>/);
#$tags =~ s/,/, /g;
foreach ( split(/,/,$tags) ) {
my $tag = $_;
my $mtag = $tag;
$mtag =~ s/\s+/\-/g; # no space in nicename
$taglines .= sprintf("<category domain=\"post_tag\" nicename=\"%s\"><![CDATA[%s]]></category>\n",
lc $mtag,$tag);
}
$taglines .= "<wp:post_id>$post_id</wp:post_id>\n"
. "<wp:post_date>$add_date_ymd</wp:post_date>\n"
. "<wp:post_name>$title</wp:post_name>\n"
. "<wp:status>publish</wp:status>\n"
. "<wp:post_type>post</wp:post_type>\n"
. "<category domain=\"post_format\" nicename=\"post-format-link\"><![CDATA[Link]]></category>\n"
. "<category domain=\"category\" nicename=\"uncategorized\"><![CDATA[Uncategorized]]></category>\n";
# drop TAGS stuff
s/ TAGS="[äöüßÄÖÜÄÖÜäöüßéèùçø \w:\@\/\(\)\#=\-\+\!\.,\'\&]*"//;
s/ ADD_DATE="\d+"//; # drop ADD_DATE, as we have already processed this
$out = "<item>\n"
. "<title>$title</title>\n"
. "<pubDate>$add_date_rfc</pubDate>\n"
. "<dc:creator>eklausmeier</dc:creator>\n"
. "<description/>\n"
. "<content:encoded><![CDATA[$_\n";
print $out if ($out ne $prev);
$prev = $out; # store current output, to later compare with previous
} elsif ($links > 0) {
if (/^<DD>/) {
# Strip of <DD> (4 chars): This is the text part
print substr($_,4);
} else {
print "$_<p>\n"; # actual text might span multiple lines
}
}
}
print "]]></content:encoded>\n$taglines</item>\n" if (length($taglines) > 0);
print << "EOF";
</channel>
</rss>
EOF
# Dead code
# s/\xC2\xAE//g; # copyright
# s/\xE2\x84\xA2//g; # trademark
# s/\xC3\xBC/ü/g; # klein ue
# s/\xE2\x80\x99/'/g; # apostroph
# s/\xC3\xA4/ä/g; # klein ae
# s/\xE4\xA4/ä/g; # klein ae
# s/\xE4\xB6/ö/g; # klein oe
# s/\xC3\xB6/ö/g; # klein oe
# s/\xC3\x9C/Ü/g; # gross Ue
# s/\xC3\x9F/ß/g; # szet
# s/\xC2\xA0//g; # backquote?
# s/\xC3\xA9/é/g; # e apostophe
# s/\xE4\xA9/é/g; # e apostophe
# s/\xC3\xA8/è/g; # e accent grave