http://twiki.org/p/pub/Codev/AppletBasedEditor/html2wiki.pl
#!/usr/bin/perl -w
package HTML::FormatterPhpwiki;
use strict;
use integer;
use vars qw(@ISA $VERSION %opt);
use Getopt::Long;
use File::Spec::Functions;
use File::Basename;
use Pod::Usage;
$VERSION = sprintf("%d.%02d", q$Revision: 1.1 $ =~ /(\d+)\.(\d+)/);
require HTML::Formatter;
require HTML::Parse;
@ISA=qw(HTML::Formatter);
sub img_start {
$_[0]->out("[",$_[1]->attr('src'),"]");
}
sub a_end {
$_[0]->out("[",
join(',$_[1]->attr('_content')),
"|",
$_[1]->attr('href'),
"]");
$_[0]->{anchor}--;
}
sub b_start {
$_[0]->{bold}++;
$_[0]->collect("__");
1;
}
sub b_end {
$_[0]->{bold}--;
$_[0]->collect("__");
1;
}
sub i_start {
$_[0]->{italic}++;
$_[0]->collect("'");
1;
}
sub i_end {
$_[0]->{italic}++;
$_[0]->collect("'");
1;
}
sub header_start {
my($self, $level, $node) = @_;
$self->nl;
$self->{maxpos} = 0;
if ($level <= 4) {
$self->out(("!" x $level) . " ");
}
1;
}
sub header_end {
my($self, $level, $node) = @_;
$self->vspace(1);
1;
}
sub hr_start
{
my $self = shift;
$self->vspace(1);
$self->out('----');
$self->vspace(1);
}
sub out {
my $self = shift;
my $text = shift;
if ($text =~ /^\s*$/) {
$self->nl;
return;
}
if (defined $self->{vspace}) {
# if ($self->{out}) {
# $self->nl; # while $self->{vspace}-- >= 0;
# }
# $self->goto_lm;
$self->{vspace} = undef;
$self->{hspace} = 0;
}
if ($self->{hspace}) {
# if ($self->{curpos} + length($text) > $self->{rm}) {
# # word will not fit on line; do a line break
# $self->nl;
# $self->goto_lm;
# } else {
# # word fits on line; use a space
$self->collect(' ');
++$self->{curpos};
# }
$self->{hspace} = 0;
}
$self->collect($text);
my $pos = $self->{curpos} += length $text;
# $self->{maxpos} = $pos if $self->{maxpos} < $pos;
$self->{'out'}++;
}
sub nl {
my $self = shift;
$self->{'out'}++;
$self->{curpos} = 0;
$self->collect("\n");
}
sub goto_lm
{
my $self = shift;
my $pos = $self->{curpos};
my $lm = $self->{lm};
if ($pos < $lm) {
$self->{curpos} = $lm;
# $self->collect(" " x ($lm - $pos));
}
}
sub adjust_lm {
my $self = shift;
$self->{lm} += $_[0];
# $self->goto_lm;
}
sub adjust_rm {
shift->{rm} += $_[0];
}
sub pre_out
{
my $self = shift;
# should really handle bold/italic etc.
if (defined $self->{vspace}) {
if ($self->{out}) {
$self->nl() while $self->{vspace}-- >= 0;
$self->{vspace} = undef;
}
}
my $indent = ' ';# x $self->{lm};
my $pre = shift;
$pre =~ s/^/$indent/mg;
$self->collect($pre);
$self->{out}++;
}
sub main {
foreach my $in (@ARGV) {
my $f = basename($in);
$f =~ s/\.[^\.]*$//;
my $out = dirname($opt{dir} || ".") . "/" . ucfirst $f;
die "input file same as output file"
if canonpath($in) eq canonpath($out);
open OUT, ">$out";
my $html = HTML::Parse::parse_htmlfile($in);
my $p = HTML::FormatterPhpwiki->new(lm => 0);
print OUT $p->format($html);
close OUT;
}
}
# TODO: be more CGI friendly
Getopt::Long::Configure ("bundling");
unless ($ENV{'REQUEST_METHOD'} or $0 !~ /html2wiki/) {
GetOptions (\%opt,'v|verbose','q|quiet','help|?',
'dir=s') or pod2usage(1);
pod2usage(-verbose => 2) if exists $opt{help};
main();
}
1;
__END__
=pod
=head1 NAME
html2wiki - PhpWiki converter
=head1 DESCRIPTION
html2wiki [options] [files...]
Converts html pages to phpwiki format.
=head1 SYNOPSIS
# create myfile
html2wiki myfile.html
# create files in the wiki source directory
html2wiki -dir /wiki/pgsrc *.html
=head1 OPTIONS
=over 4
=item B<-dir> <arg>
Output directory
=item B<-v> or B<--verbose>
=item B<-q> or B<--quiet>
=back
=head1 COPYRIGHT
Written and Copyright 2001 by Reini Urban.
Same usage restrictions and warranties as perl itself,
i.e the Perl Artistic License or GNU GPL.
=head1 TODO
B<-R> Recursive
=cut