http://twiki.org/p/pub/Codev/AppletBasedEditor/html2wiki.pl
#!/usr/bin/perl -w package HTML::FormatterPhpwiki; use strict; use integer; use vars qw(@ISA $VERSION %opt); use Getopt::Long; use File::Spec::Functions; use File::Basename; use Pod::Usage; $VERSION = sprintf("%d.%02d", q$Revision: 1.1 $ =~ /(\d+)\.(\d+)/); require HTML::Formatter; require HTML::Parse; @ISA=qw(HTML::Formatter); sub img_start { $_[0]->out("[",$_[1]->attr('src'),"]"); } sub a_end { $_[0]->out("[", join(',$_[1]->attr('_content')), "|", $_[1]->attr('href'), "]"); $_[0]->{anchor}--; } sub b_start { $_[0]->{bold}++; $_[0]->collect("__"); 1; } sub b_end { $_[0]->{bold}--; $_[0]->collect("__"); 1; } sub i_start { $_[0]->{italic}++; $_[0]->collect("'"); 1; } sub i_end { $_[0]->{italic}++; $_[0]->collect("'"); 1; } sub header_start { my($self, $level, $node) = @_; $self->nl; $self->{maxpos} = 0; if ($level <= 4) { $self->out(("!" x $level) . " "); } 1; } sub header_end { my($self, $level, $node) = @_; $self->vspace(1); 1; } sub hr_start { my $self = shift; $self->vspace(1); $self->out('----'); $self->vspace(1); } sub out { my $self = shift; my $text = shift; if ($text =~ /^\s*$/) { $self->nl; return; } if (defined $self->{vspace}) { # if ($self->{out}) { # $self->nl; # while $self->{vspace}-- >= 0; # } # $self->goto_lm; $self->{vspace} = undef; $self->{hspace} = 0; } if ($self->{hspace}) { # if ($self->{curpos} + length($text) > $self->{rm}) { # # word will not fit on line; do a line break # $self->nl; # $self->goto_lm; # } else { # # word fits on line; use a space $self->collect(' '); ++$self->{curpos}; # } $self->{hspace} = 0; } $self->collect($text); my $pos = $self->{curpos} += length $text; # $self->{maxpos} = $pos if $self->{maxpos} < $pos; $self->{'out'}++; } sub nl { my $self = shift; $self->{'out'}++; $self->{curpos} = 0; $self->collect("\n"); } sub goto_lm { my $self = shift; my $pos = $self->{curpos}; my $lm = $self->{lm}; if ($pos < $lm) { $self->{curpos} = $lm; # $self->collect(" " x ($lm - $pos)); } } sub adjust_lm { my $self = shift; $self->{lm} += $_[0]; # $self->goto_lm; } sub adjust_rm { shift->{rm} += $_[0]; } sub pre_out { my $self = shift; # should really handle bold/italic etc. if (defined $self->{vspace}) { if ($self->{out}) { $self->nl() while $self->{vspace}-- >= 0; $self->{vspace} = undef; } } my $indent = ' ';# x $self->{lm}; my $pre = shift; $pre =~ s/^/$indent/mg; $self->collect($pre); $self->{out}++; } sub main { foreach my $in (@ARGV) { my $f = basename($in); $f =~ s/\.[^\.]*$//; my $out = dirname($opt{dir} || ".") . "/" . ucfirst $f; die "input file same as output file" if canonpath($in) eq canonpath($out); open OUT, ">$out"; my $html = HTML::Parse::parse_htmlfile($in); my $p = HTML::FormatterPhpwiki->new(lm => 0); print OUT $p->format($html); close OUT; } } # TODO: be more CGI friendly Getopt::Long::Configure ("bundling"); unless ($ENV{'REQUEST_METHOD'} or $0 !~ /html2wiki/) { GetOptions (\%opt,'v|verbose','q|quiet','help|?', 'dir=s') or pod2usage(1); pod2usage(-verbose => 2) if exists $opt{help}; main(); } 1; __END__ =pod =head1 NAME html2wiki - PhpWiki converter =head1 DESCRIPTION html2wiki [options] [files...] Converts html pages to phpwiki format. =head1 SYNOPSIS # create myfile html2wiki myfile.html # create files in the wiki source directory html2wiki -dir /wiki/pgsrc *.html =head1 OPTIONS =over 4 =item B<-dir> <arg> Output directory =item B<-v> or B<--verbose> =item B<-q> or B<--quiet> =back =head1 COPYRIGHT Written and Copyright 2001 by Reini Urban. Same usage restrictions and warranties as perl itself, i.e the Perl Artistic License or GNU GPL. =head1 TODO B<-R> Recursive =cut