Phpwiki 

http://twiki.org/p/pub/Codev/AppletBasedEditor/html2wiki.pl

#!/usr/bin/perl -w

package HTML::FormatterPhpwiki;
use strict;
use integer;
use vars qw(@ISA $VERSION %opt);
use Getopt::Long;
use File::Spec::Functions;
use File::Basename;
use Pod::Usage;

$VERSION = sprintf("%d.%02d", q$Revision: 1.1 $ =~ /(\d+)\.(\d+)/);

require HTML::Formatter;
require HTML::Parse;
@ISA=qw(HTML::Formatter);

sub img_start {
    $_[0]->out("[",$_[1]->attr('src'),"]");
}
sub a_end {
  $_[0]->out("[",
        join(',$_[1]->attr('_content')),
             "|",
             $_[1]->attr('href'),
             "]");
  $_[0]->{anchor}--;
}
sub b_start {
    $_[0]->{bold}++;
    $_[0]->collect("__");
    1;
}
sub b_end {
    $_[0]->{bold}--;
    $_[0]->collect("__");
    1;
}
sub i_start {
    $_[0]->{italic}++;
    $_[0]->collect("'");
    1;
}
sub i_end {
    $_[0]->{italic}++;
    $_[0]->collect("'");
    1;
}

sub header_start {
    my($self, $level, $node) = @_;
    $self->nl;
    $self->{maxpos} = 0;
    if ($level <= 4) {
   $self->out(("!" x $level) . " ");
    }
    1;
}
sub header_end {
    my($self, $level, $node) = @_;
    $self->vspace(1);
    1;
}
sub hr_start
{
    my $self = shift;
    $self->vspace(1);
    $self->out('----');
    $self->vspace(1);
}

sub out {
    my $self = shift;
    my $text = shift;

    if ($text =~ /^\s*$/) {
      $self->nl;
      return;
    }

    if (defined $self->{vspace}) {
#  if ($self->{out}) {
#      $self->nl; # while $self->{vspace}-- >= 0;
#        }
#  $self->goto_lm;
   $self->{vspace} = undef;
   $self->{hspace} = 0;
    }

    if ($self->{hspace}) {

#  if ($self->{curpos} + length($text) > $self->{rm}) {
#      # word will not fit on line; do a line break
#      $self->nl;
#      $self->goto_lm;
#  } else {
#      # word fits on line; use a space
       $self->collect(' ');
       ++$self->{curpos};
#  }
   $self->{hspace} = 0;
    }

    $self->collect($text);
    my $pos = $self->{curpos} += length $text;
    # $self->{maxpos} = $pos if $self->{maxpos} < $pos;
    $self->{'out'}++;
}
sub nl {
    my $self = shift;
    $self->{'out'}++;
    $self->{curpos} = 0;
    $self->collect("\n");
}
sub goto_lm
{
    my $self = shift;
    my $pos = $self->{curpos};
    my $lm  = $self->{lm};
    if ($pos < $lm) {
   $self->{curpos} = $lm;
   # $self->collect(" " x ($lm - $pos));
    }
}
sub adjust_lm {
    my $self = shift;
    $self->{lm} += $_[0];
    # $self->goto_lm;
}
sub adjust_rm {
    shift->{rm} += $_[0];
}
sub pre_out
{
    my $self = shift;
    # should really handle bold/italic etc.
    if (defined $self->{vspace}) {
   if ($self->{out}) {
       $self->nl() while $self->{vspace}-- >= 0;
       $self->{vspace} = undef;
   }
    }
    my $indent = '  ';# x $self->{lm};
    my $pre = shift;
    $pre =~ s/^/$indent/mg;
    $self->collect($pre);
    $self->{out}++;
}

sub main {
  foreach my $in (@ARGV) {
    my $f = basename($in);
    $f =~ s/\.[^\.]*$//;
    my $out = dirname($opt{dir} || ".") . "/" . ucfirst $f;
    die "input file same as output file"
      if canonpath($in) eq canonpath($out);
    open OUT, ">$out";
    my $html = HTML::Parse::parse_htmlfile($in);
    my $p = HTML::FormatterPhpwiki->new(lm => 0);
    print OUT $p->format($html);
    close OUT;
  }
}

# TODO: be more CGI friendly
Getopt::Long::Configure ("bundling");
unless ($ENV{'REQUEST_METHOD'} or $0 !~ /html2wiki/) {
  GetOptions (\%opt,'v|verbose','q|quiet','help|?',
         'dir=s') or pod2usage(1);
  pod2usage(-verbose => 2) if exists $opt{help};
  main();
}

1;
__END__

=pod

=head1 NAME

html2wiki - PhpWiki converter

=head1 DESCRIPTION

  html2wiki [options] [files...]

Converts html pages to phpwiki format.

=head1 SYNOPSIS

  # create myfile
  html2wiki myfile.html

  # create files in the wiki source directory
  html2wiki -dir /wiki/pgsrc *.html

=head1 OPTIONS

=over 4

=item B<-dir> <arg>

Output directory

=item B<-v> or B<--verbose>

=item B<-q> or B<--quiet>

=back

=head1 COPYRIGHT

Written and Copyright 2001 by Reini Urban.
Same usage restrictions and warranties as perl itself,
i.e the Perl Artistic License or GNU GPL.

=head1 TODO

B<-R> Recursive

=cut