Someone asked for an example so I dug up my jspwiki tool. Here is the guts of the parser:
sub makeParser {
my($data) = @_;
my $pos = 0;
my $setData = sub { $data = $_[0]; $pos = 0; };
my $start = sub { $pos = 0 };
my $fwd = sub { return -1 if $pos == -1; $pos += $_[0]; $pos = -1 if $pos >= length($data); $pos };
my $bck = sub { return -1 if $pos == -1; $pos -= $_[0]; $pos = -1 if $pos < 0; $pos };
my $bckTo = sub { return -1 if $pos == -1; $pos = rindex $data, $_[0], $pos; };
my $fwdTo = sub { return -1 if $pos == -1; $pos = index $data, $_[0], $pos; };
my $fwdPast = sub {
return $pos if $pos == -1;
$pos = index $data, $_[0], $pos;
return $pos if $pos == -1;
$pos += length($_[0]);
$pos >= length($data) ? $pos = -1 : $pos;
};
my $btwn = sub {
return -1 if $pos == -1;
my $s = $fwdPast->($_[0]);
return undef if $s == -1;
my $e = $fwdTo->($_[1]);
return undef if $e == -1;
my $item = substr $data, $s, $e - $s;
return $item;
};
my $all = sub {
my @all;
while (-1 != $pos) {
my $item = $btwn->(@_);
last unless $item;
push @all, $item;
}
return @all;
};
return ($setData,$start,$fwd,$bck,$fwdTo,$fwdPast,$bckTo,$btwn,$all);
}
and here is how it gets used:
sub getPageInfo {
my($topic) = @_;
my $data = $UserAgent->get("$BaseURL/PageInfo.jsp?page=$topic")->content;
print "$data\n";
my($setData,$start,$fwd,$bck,$fwdTo,$fwdPast,$bckTo,$btwn,$all) = makeParser($data);
my $table = $btwn->('Version','</table>');
($setData,$start,$fwd,$bck,$fwdTo,$fwdPast,$bckTo,$btwn,$all) = makeParser($table);
print join("\t",qw(Version Date Author Size Changes from Previous)),"\n";
dumpRow($_) for $all->('<tr>','</tr>');
}
sub dumpRow {
my($row) = @_;
my($setData,$start,$fwd,$bck,$fwdTo,$fwdPast,$bckTo,$btwn,$all) = makeParser($row);
print join("\t",map { simpleStrip($_) } $all->('<td>','</td>')),"\n";
}
|