2 # Copyright (C) 2007 Reini Urban
4 # This program is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU General Public License as
6 # published by the Free Software Foundation; either version 2 of the
7 # License, or (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License along
15 # with this program; if not, write to the Free Software Foundation, Inc.,
16 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22 wiki2omega [-u URL] WIKIDB
28 The first argument may be a phpwiki url, a directory or a single file.
29 If it is an url the -url option is optional, otherwise mandatory.
33 The url prefix for the xapian database to be able to link to the result page.
37 Prints the manual page and exits.
43 Convert a phpwiki database to a format recognizable for xapian scriptindex,
44 to prepare a fulltext index.
48 # serial dump (see config.ini for DEFAULT_DUMP_DIR)
49 DEFAULT_DUMP_DIR = /var/www/wikidb/pgsrc
50 lwp-request -d http://localhost/phpwiki/?action=dumpserial
51 wiki2omega -u wiki $DEFAULT_DUMP_DIR | \
52 scriptindex /var/lib/omega/data/wiki /usr/share/omega/wiki2omega.script
55 wiki2omega http://localhost/phpwiki/ | \
56 scriptindex /var/lib/omega/data/wiki /usr/share/omega/wiki2omega.script
59 wiki2omega -u wiki /tmp/wikidump/HomePage | \
60 scriptindex /var/lib/omega/data/wiki /usr/share/omega/wiki2omega.script
63 DEFAULT_DUMP_DIR = /var/www/wikidb/pgsrc
64 nice /usr/bin/lwp-request -P -d -m GET "http://localhost/wiki/?action=dumpserial"
65 nice wiki2omega -u /wiki $DEFAULT_DUMP_DIR | \
66 scriptindex /var/lib/omega/data/wiki /var/lib/omega/scripts/wiki2index.script \
67 > /var/log/omega/updateindex-wiki.log
74 use Digest::MD5 qw(md5_hex);
75 use constant AZ_OK => 0;
82 GetOptions('help|?' => \$help,
84 'url|u=s' => \$wikiurl,
86 pod2usage(1) if $help;
87 pod2usage(-exitstatus => 0, -verbose => 2) if $man;
91 $s =~ s/%([A-F0-9][A-F0-9])/chr(hex($1))/eg;
101 return unless $content;
102 $hdr{md5} = md5_hex($content);
103 $hdr{size} = length($content);
104 $hdr{language} = 'english';
105 @_ = split(/\n/, $content);
106 LINE: while ($_ = shift @_) {
110 # headers finished, dump them
111 my $title = $hdr{title};
112 print "url=$wikiurl/$title\n";
113 for my $h (keys %hdr) {
114 print "$h=$hdr{$h}\n";
120 # ignore continuation lines
122 #while ($_ = shift @_) {
124 # last unless /^[ \t]/;
127 if ($line =~ s/^Date:\s*(.*?)\s*$/$1/i) {
128 $hdr{date} = $line if length $line;
129 #print "date=$line\n" if length $line;
130 } elsif ($line =~ s/ pagename=(.*?);$/$1/) {
131 $hdr{title} = urldecode($line) if length $line;
132 } elsif ($line =~ s/ author=(.*?);$/$1/) {
133 $hdr{author} = urldecode($line) if length $line;
134 } elsif ($line =~ s/ lastmodified=(.*?);$/$1/) {
135 $hdr{lastmod} = $line if length $line;
136 $hdr{date} = $line if length $line;
139 # the rest is the content:
141 if (/^\s*\!+(.+)$/) {
147 print "headers=$headers" if $headers;
151 $wikiurl = $db = shift or die "Syntax: $0 WIKIURL\n";
153 $db = shift or die "Syntax: $0 -u WIKIURL DATABASE\n";
155 if ($db =~ /^http/) {
156 `wget -nv -O/tmp/wikidb.zip "$db?action=zip"`;
157 $db = "/tmp/wikidb.zip";
160 if ($db =~ /\.zip$/i) {
161 eval "require Archive::Zip;";
162 my $zip = Archive::Zip->new();
163 die 'wikidb.zip read error' unless $zip->read( $db ) == AZ_OK;
164 foreach my $member ($zip->members()) {
165 unless ($member->isDirectory()) {
166 my $page = $zip->contents($member);
174 while (my $filename = shift @_) {
175 open IN, "< $filename";
189 die "invalid argument";