first commit

This commit is contained in:
douboer
2025-09-17 16:08:16 +08:00
parent 9395faa6b2
commit 3ff47c11d5
1318 changed files with 117477 additions and 0 deletions

View File

@@ -0,0 +1,56 @@
###
### DFSParser.pm
###
## Chapter 8 section 2.2
require "make-dfs-search";
sub make_parser_for_grammar {
my ($start, $grammar, $target) = @_;
my $is_nonterminal = sub {
my $symbol = shift;
exists $grammar->{$symbol};
};
my $is_interesting = sub {
my $sentential_form = shift;
my $i;
for ($i=0; $i < @$sentential_form; $i++) {
return 1 if $is_nonterminal->($sentential_form->[$i]);
return if $i > $#$target;
return if $sentential_form->[$i] ne $target->[$i];
}
return @$sentential_form == @$target ;
};
my $children = sub {
my $sentential_form = shift;
my $leftmost_nonterminal;
my @children;
for my $i (0 .. $#$sentential_form) {
if ($is_nonterminal->($sentential_form->[$i])) {
$leftmost_nonterminal = $i;
last;
} else {
return if $i > $#$target;
return if $target->[$i] ne $sentential_form->[$i];
}
}
return unless defined $leftmost_nonterminal; # no nonterminal symbols
for my $production (@{$grammar->{$sentential_form->[$leftmost_nonterminal]}}) {
my @child = @$sentential_form;
splice @child, $leftmost_nonterminal, 1, @$production;
push @children, \@child;
}
@children;
};
return sub {
make_dfs_search([$start], $children, $is_interesting
);
};
}
1;

View File

@@ -0,0 +1,110 @@
###
### Lexer.pm
###
## Chapter 8 section 1.1
package Lexer;
use base "Exporter";
@EXPORT_OK = qw(make_charstream blocks records tokens iterator_to_stream
make_lexer allinput);
%EXPORT_TAGS = ('all' => \@EXPORT_OK);
sub make_charstream {
my $fh = shift;
return sub { return getc($fh) };
}
## Chapter 8 section 1.1
sub records {
my $input = shift;
my $terminator = @_ ? shift : quotemeta($/);
my @records;
my @newrecs = split /($terminator)/, $input;
while (@newrecs > 2) {
push @records, shift(@newrecs).shift(@newrecs);
}
push @records, @newrecs;
return sub {
return shift @records;
}
}
## Chapter 8 section 1.3
sub allinput {
my $fh = shift;
my @data;
{ local $/;
$data[0] = <$fh>;
}
sub { return shift @data }
}
sub blocks {
my $fh = shift;
my $blocksize = shift || 8192;
sub {
return unless read $fh, my($block), $blocksize;
return $block;
}
}
## Chapter 8 section 1.3
sub tokens {
my ($input, $label, $pattern, $maketoken) = @_;
$maketoken ||= sub { [ $_[1], $_[0] ] };
my @tokens;
my $buf = ""; # set to undef to when input is exhausted
my $split = sub { split /($pattern)/, $_[0] };
sub {
while (@tokens == 0 && defined $buf) {
my $i = $input->();
if (ref $i) {
my ($sep, $tok) = $split->($buf);
$tok = $maketoken->($tok, $label) if defined $tok;
push @tokens, grep $_ ne "", $sep, $tok, $i;
$buf = "";
last;
}
$buf .= $i if defined $i;
my @newtoks = $split->($buf);
while (@newtoks > 2
|| @newtoks && ! defined $i) {
push @tokens, shift(@newtoks);
push @tokens, $maketoken->(shift(@newtoks), $label)
if @newtoks;
}
$buf = join "", @newtoks;
undef $buf if ! defined $i;
@tokens = grep $_ ne "", @tokens;
}
return shift(@tokens);
}
}
## Chapter 8 section 1.3
sub make_lexer {
my $lexer = shift;
while (@_) {
my $args = shift;
$lexer = tokens($lexer, @$args);
}
$lexer;
}
## Chapter 8 section 1.4
BEGIN { require 'iterator-to-stream.pl' }
1;

View File

@@ -0,0 +1,232 @@
###
### Parser.pm
###
## Chapter 8 section 3
package Parser;
use Stream ':all';
use base Exporter;
@EXPORT_OK = qw(parser nothing End_of_Input lookfor
alternate concatenate star list_of
operator T
error action test);
%EXPORT_TAGS = ('all' => \@EXPORT_OK);
sub parser (&); # Advance declaration - see below
## Chapter 8 section 3.1
sub nothing {
my $input = shift;
return (undef, $input);
}
sub End_of_Input {
my $input = shift;
defined($input) ? () : (undef, undef);
}
## Chapter 8 section 3.1
sub lookfor {
my $wanted = shift;
my $value = shift || sub { $_[0][1] };
my $u = shift;
$wanted = [$wanted] unless ref $wanted;
my $parser = parser {
my $input = shift;
return unless defined $input;
my $next = head($input);
for my $i (0 .. $#$wanted) {
next unless defined $wanted->[$i];
return unless $wanted->[$i] eq $next->[$i];
}
my $wanted_value = $value->($next, $u);
return ($wanted_value, tail($input));
};
return $parser;
}
## Chapter 8 section 3.1
sub parser (&) { $_[0] }
## Chapter 8 section 3.2
sub concatenate {
my @p = @_;
return \&nothing if @p == 0;
return $p[0] if @p == 1;
my $parser = parser {
my $input = shift;
my $v;
my @values;
for (@p) {
($v, $input) = $_->($input) or return;
push @values, $v;
}
return (\@values, $input);
}
}
## Chapter 8 section 3.2
sub alternate {
my @p = @_;
return parser { return () } if @p == 0;
return $p[0] if @p == 1;
my $parser = parser {
my $input = shift;
my ($v, $newinput);
for (@p) {
if (($v, $newinput) = $_->($input)) {
return ($v, $newinput);
}
}
return;
};
}
## Chapter 8 section 3.3
sub star {
my $p = shift;
my $p_star;
$p_star = alternate(concatenate($p, parser { $p_star->(@_) }),
\&nothing);
}
## Chapter 8 section 3.3
sub list_of {
my ($element, $separator) = @_;
$separator = lookfor('COMMA') unless defined $separator;
return concatenate($element,
star($separator, $element));
}
1;
## Chapter 8 section 4
sub T {
my ($parser, $transform) = @_;
return parser {
my $input = shift;
if (my ($value, $newinput) = $parser->($input)) {
$value = $transform->(@$value);
return ($value, $newinput);
} else {
return;
}
};
}
## Chapter 8 section 4.3
sub null_list {
my $input = shift;
return ([], $input);
}
sub star {
my $p = shift;
my $p_star;
$p_star = alternate(T(concatenate($p, parser { $p_star->(@_) }),
sub { my ($first, $rest) = @_;
[$first, @$rest];
}),
\&null_list);
}
## Chapter 8 section 4.4
sub operator {
my ($subpart, @ops) = @_;
my (@alternatives);
for my $operator (@ops) {
my ($op, $opfunc) = @$operator;
push @alternatives,
T(concatenate($op,
$subpart),
sub {
my $subpart_value = $_[1];
sub { $opfunc->($_[0], $subpart_value) }
});
}
my $result =
T(concatenate($subpart,
star(alternate(@alternatives))),
sub { my ($total, $funcs) = @_;
for my $f (@$funcs) {
$total = $f->($total);
}
$total;
});
}
## Chapter 8 section 4.7.1
sub error {
my ($checker, $continuation) = @_;
my $p;
$p = parser {
my $input = shift;
while (defined($input)) {
if (my (undef, $result) = $checker->($input)) {
$input = $result;
last;
} else {
drop($input);
}
}
return unless defined $input;
return $continuation->($input);
};
$N{$p} = "errhandler($N{$continuation} -> $N{$checker})";
return $p;
}
## Chapter 8 section 6
sub action {
my $action = shift;
return parser {
my $input = shift;
$action->($input);
return (undef, $input);
};
}
## Chapter 8 section 6
sub test {
my $action = shift;
return parser {
my $input = shift;
my $result = $action->($input);
return $result ? (undef, $input) : ();
};
}

View File

@@ -0,0 +1,107 @@
###
### Parser::Exception.pm
###
## Chapter 8 section 4.7.2
sub End_of_Input {
my $input = shift;
return (undef, undef) unless defined($input);
die ["End of input", $input];
}
sub lookfor {
my $wanted = shift;
my $value = shift || sub { $_[0][1] };
my $u = shift;
$wanted = [$wanted] unless ref $wanted;
my $parser = parser {
my $input = shift;
unless (defined $input) {
die ['TOKEN', $input, $wanted];
}
my $next = head($input);
for my $i (0 .. $#$wanted) {
next unless defined $wanted->[$i];
unless ($wanted->[$i] eq $next->[$i]) {
die ['TOKEN', $input, $wanted];
}
}
my $wanted_value = $value->($next, $u);
return ($wanted_value, tail($input));
};
$N{$parser} = "[@$wanted]";
return $parser;
}
sub alternate {
my @p = @_;
return parser { return () } if @p == 0;
return $p[0] if @p == 1;
my $p;
$p = parser {
my $input = shift;
my ($v, $newinput);
my @failures;
for (@p) {
eval { ($v, $newinput) = $_->($input) };
if ($@) {
die unless ref $@;
push @failures, $@;
} else {
return ($v, $newinput);
}
}
die ['ALT', $input, \@failures];
};
$N{$p} = "(" . join(" | ", map $N{$_}, @p) . ")";
return $p;
}
sub error {
my ($try) = @_;
my $p;
$p = parser {
my $input = shift;
my @result = eval { $try->($input) };
if ($@) {
display_failures($@) if ref $@;
die;
}
return @result;
};
}
sub display_failures {
my ($fail, $depth) = @_;
$depth ||= 0;
my $I = " " x $depth;
my ($type, $position, $data) = @$fail;
my $pos_desc = "";
while (length($pos_desc) < 40) {
if ($position) {
my $h = head($position);
$pos_desc .= "[@$h] ";
} else {
$pos_desc .= "End of input ";
last;
}
$position = tail($position);
}
chop $pos_desc;
$pos_desc .= "..." if defined $position;
if ($type eq 'TOKEN') {
print $I, "Wanted [@$data] instead of '$pos_desc'\n";
} elsif ($type eq 'End of input') {
print $I, "Wanted EOI instead of '$pos_desc'\n";
} elsif ($type eq 'ALT') {
print $I, ($depth ? "Or any" : "Any"), " of the following:\n";
for (@$data) {
display_failures($_, $depth+1);
}
}
}

View File

@@ -0,0 +1,22 @@
###
### T-continuation
###
## Chapter 8 section 8.1
sub T {
my ($parser, $transform) = @_;
my $p = sub {
my ($input, $continuation) = @_;
if (my $v = $parser->($input, $continuation)) {
$v = $transform->(@$v);
return $v;
} else {
return;
}
};
$N{$p} = $N{$parser};
return $p;
}

View File

@@ -0,0 +1,26 @@
###
### alternate-continuation
###
## Chapter 8 section 8.1
sub alternate {
my @p = @_;
return parser { return () } if @p == 0;
return $p[0] if @p == 1;
my $p;
$p = parser {
my ($input, $continuation) = @_;
for (@p) {
if (my ($v) = $_->($input, $continuation)) {
return $v;
}
}
return; # Failure
};
$N{$p} = "(" . join(" | ", map $N{$_}, @p) . ")";
return $p;
}

View File

@@ -0,0 +1,71 @@
###
### calculator
###
## Chapter 8 section 4.6
use Parser ':all';
use Lexer ':all';
my $input = allinput(\*STDIN);
my $lexer = iterator_to_stream(
make_lexer($input,
['TERMINATOR', qr/;\n*|\n+/ ],
['INT', qr/\d+/ ],
['PRINT', qr/\bprint\b/ ],
['IDENTIFIER', qr|[A-Za-z_]\w*| ],
['OP', qr#\*\*|[-=+*/()]# ],
['WHITESPACE', qr/\s+/, sub { "" } ],
)
);
## Chapter 8 section 4.6
my %VAR;
my ($base, $expression, $factor, $program, $statement, $term);
$Base = parser { $base->(@_) };
$Expression = parser { $expression->(@_) };
$Factor = parser { $factor->(@_) };
$Program = parser { $program->(@_) };
$Statement = parser { $statement->(@_) };
$Term = parser { $term->(@_) };
$program = concatenate(star($Statement), \&End_of_Input);
$statement = alternate(T(concatenate(lookfor('PRINT'),
$Expression,
lookfor('TERMINATOR')),
sub { print ">> $_[1]\n" }),
T(concatenate(lookfor('IDENTIFIER'),
lookfor(['OP', '=']),
$Expression,
lookfor('TERMINATOR')
),
sub { $VAR{$_[0]} = $_[2] }),
);
$expression =
operator($Term, [lookfor(['OP', '+']), sub { $_[0] + $_[1] }],
[lookfor(['OP', '-']), sub { $_[0] - $_[1] }]);
$term =
operator($Factor, [lookfor(['OP', '*']), sub { $_[0] * $_[1] }],
[lookfor(['OP', '/']), sub { $_[0] / $_[1] }]);
$factor = T(concatenate($Base,
alternate(T(concatenate(lookfor(['OP', '**']),
$Factor),
sub { $_[1] }),
T(\&nothing, sub { 1 }))),
sub { $_[0] ** $_[1] });
$base = alternate(lookfor('INT'),
lookfor('IDENTIFIER',
sub { $VAR{$_[0][1]} || 0 }),
T(concatenate(lookfor(['OP', '(']),
$Expression,
lookfor(['OP', ')'])),
sub { $_[1] })
);
$program->($lexer);

View File

@@ -0,0 +1,16 @@
###
### concatenate-continuation
###
## Chapter 8 section 8.1
sub concatenate {
my (@p) = @_;
return \&nothing if @p == 0;
my $p0 = shift @p;
return $p0 if @p == 0;
return concatenate2($p0, concatenate(@p));
}

View File

@@ -0,0 +1,28 @@
###
### concatenate2-continuation
###
## Chapter 8 section 8.1
sub concatenate2 {
my ($A, $B) = @_;
my $p;
$p = parser {
my ($input, $continuation) = @_;
my ($aval, $bval);
my $BC = parser {
my ($newinput) = @_;
return unless ($bval) = $B->($newinput, $continuation);
};
$N{$BC} = "$N{$B} $N{$continuation}";
if (($aval) = $A->($input, $BC)) {
return ([$aval, $bval]);
} else {
return;
}
};
$N{$p} = "$N{$A} $N{$B}";
return $p;
}

125
perl/Examples/Chap8/dqp.pl Normal file
View File

@@ -0,0 +1,125 @@
###
### dqp.pl
###
## Chapter 8 section 7.1
use Lexer ':all';
sub lex_input {
my @input = @_;
my $input = sub { shift @input };
my $lexer = iterator_to_stream(
make_lexer($input,
['STRING', qr/' (?: \\. | [^'] )* '
|" (?: \\. | [^"] )* " /sx,
sub { my $s = shift;
$s =~ s/.//; $s =~ s/.$//;
$s =~ s/\\(.)/$1/g;
['STRING', $s] } ],
['FIELD', qr/[A-Z]+/ ],
['AND', qr/&/ ],
['OR', qr/\|/ ],
['OP', qr/[!<>=]=|[<>=]/,
sub { $_[0] =~ s/^=$/==/;
[ 'OP', $_[0] ] } ],
['LPAREN', qr/[(]/ ],
['RPAREN', qr/[)]/ ],
['NUMBER', qr/\d+ (?:\.\d*)? | \.\d+/x ],
['SPACE', qr/\s+/, sub { "" } ],
)
);
}
## Chapter 8 section 7.2
use Parser ':all';
use FlatDB_Composable qw(query_or query_and);
my ($cquery, $squery, $term);
my $CQuery = parser { $cquery->(@_) };
my $SQuery = parser { $squery->(@_) };
my $Term = parser { $term->(@_) };
use FlatDB;
## Chapter 8 section 7.2
$cquery = operator($Term, [lookfor('OR'), \&query_or]);
$term = operator($SQuery, [lookfor('AND'), \&query_and]);
## Chapter 8 section 7.2
# This needs to be up here so that the following $squery
# definition can see $parser_dbh
my $parser_dbh;
sub set_parser_dbh { $parser_dbh = shift }
sub parser_dbh { $parser_dbh }
## Chapter 8 section 7.2
$squery = alternate(
T(concatenate(lookfor('LPAREN'),
$CQuery,
lookfor('RPAREN'),
),
sub { $_[1] }),
T(concatenate(lookfor('FIELD'),
lookfor('OP'),
lookfor('NUMBER')),
sub {
my ($field, $op, $val) = @_;
my $cmp_code = 'sub { $_[0] OP $_[1] }';
$cmp_code =~ s/OP/$op/;
my $cmp = eval($cmp_code) or die;
my $cb = sub { my %F = @_;
$cmp->($F{$field}, $val)};
$parser_dbh->callbackquery($cb);
}),
## Chapter 8 section 7.2
T(concatenate(lookfor('FIELD'),
lookfor('OP'),
lookfor('STRING')),
sub {
if ($_[1] eq '==') {
$parser_dbh->query($_[0], $_[2]);
} else {
my ($field, $op, $val) = @_;
my $cmp_code = 'sub { $_[0] OP $_[1] }';
$cmp_code =~ s/OP/$string_version{$op}/;
my $cmp = eval($cmp_code) or die;
my $cb = sub { my %F = @_;
$cmp->($F{$field}, $val)};
$parser_dbh->callbackquery($cb);
}
}),
);
my %string_version = ('>' => 'gt', '>=', => 'ge', '==' => 'eq',
'<' => 'lt', '<=', => 'le', '!=' => 'ne');
package FlatDB::Parser;
use base FlatDB_Composable;
sub parse_query {
my $self = shift;
my $query = shift;
my $lexer = main::lex_input($query);
my $old_parser_dbh = main::parser_dbh();
main::set_parser_dbh($self);
my ($result) = $cquery->($lexer);
main::set_parser_dbh($old_parser_dbh);
return $result;
}

View File

@@ -0,0 +1,32 @@
###
### simple-expr-parser-2.pl
###
## Chapter 8 section 4
use Parser ':all';
use Lexer ':all';
my ($expression, $term, $factor);
my $Expression = parser { $expression->(@_) };
my $Term = parser { $term ->(@_) };
my $Factor = parser { $factor ->(@_) };
$expression = alternate(concatenate($Term,
lookfor(['OP', '+']),
$Expression),
$Term);
$term = alternate(concatenate($Factor,
lookfor(['OP', '*']),
$Term),
$Factor);
$factor = alternate(lookfor('INT'),
concatenate(lookfor(['OP', '(']),
$Expression,
lookfor(['OP', ')']))
);
$entire_input = concatenate($Expression, \&End_of_Input);

View File

@@ -0,0 +1,42 @@
###
### simple-expr-parser.pl
###
## Chapter 8 section 4
use Parser ':all';
use Lexer ':all';
my $expression;
my $Expression = parser { $expression->(@_) };
$expression = alternate(concatenate(lookfor('INT'),
lookfor(['OP', '+']),
$Expression),
concatenate(lookfor('INT'),
lookfor(['OP', '*']),
$Expression),
concatenate(lookfor(['OP', '(']),
$Expression,
lookfor(['OP', ')'])),
lookfor('INT'));
my $entire_input = concatenate($Expression, \&End_of_Input);
my @input = q[2 * 3 + (4 * 5)];
my $input = sub { return shift @input };
my $lexer = iterator_to_stream(
make_lexer($input,
['TERMINATOR', qr/;\n*|\n+/ ],
['INT, qr/\d+/ ],
['PRINT', qr/\bprint\b/ ],
['IDENTIFIER', qr|[A-Za-z_]\w*| ],
['OP', qr#\*\*|[-=+*/()]# ],
['WHITESPACE', qr/\s+/, sub { "" } ],
)
);
if (my ($result, $remaining_input) = $entire_input->($lexer)) {
use Data::Dumper;
print Dumper($result), "\n";
} else {
warn "Parse error.\n";
}

View File

@@ -0,0 +1,18 @@
###
### iterator-to-stream.pl
###
## Chapter 8 section 1.4
use Stream 'node';
sub iterator_to_stream {
my $it = shift;
my $v = $it->();
return unless defined $v;
node($v, sub { iterator_to_stream($it) });
}
1;

View File

@@ -0,0 +1,36 @@
###
### lookfor-continuation
###
## Chapter 8 section 8.1
sub lookfor {
my $wanted = shift;
my $value = shift || sub { $_[0] };
my $u = shift;
$wanted = [$wanted] unless ref $wanted;
my $parser = parser {
my ($input, $continuation) = @_;
return unless defined $input;
my $next = head($input);
for my $i (0 .. $#$wanted) {
next unless defined $wanted->[$i];
return unless $wanted->[$i] eq $next->[$i];
}
my $wanted_value = $value->($next, $u);
# Try continuation
if (my ($v) = $continuation->(tail($input))) {
return $wanted_value;
} else {
return;
}
};
$N{$parser} = "[@$wanted]";
return $parser;
}

View File

@@ -0,0 +1,12 @@
###
### nothing-continuation
###
## Chapter 8 section 8.1
sub nothing {
my ($input, $continuation) = @_;
return $continuation->($input);
}

View File

@@ -0,0 +1,28 @@
###
### operator-singleop
###
## Chapter 8 section 4.4
sub operator {
my ($subpart, $op, $opfunc) = @_;
# Build and return parser like the one above
T(concatenate($subpart, star(T(concatenate($op,
$subpart),
sub {
my $subpart_value = $_[1];
sub { $opfunc->($_[0], $subpart_value };
}),
)),
sub { my ($total, $funcs) = @_;
for my $f (@$funcs) {
$total = $f->($total);
}
$total;
}
);
}

View File

@@ -0,0 +1,72 @@
###
### outline-parser
###
## Chapter 8 section 6
use Lexer ':all';
use Stream 'node';
my ($tree, $subtree);
sub outline_to_array {
my @input = @_;
my $input = sub { shift @input };
my $lexer = iterator_to_stream(
make_lexer($input,
['ITEM', qr/^.*$/m ],
['NEWLINE', qr/\n+/ , sub { "" } ],
)
);
my ($result) = $tree->($lexer);
return $result;
}
## Chapter 8 section 6
use Parser ':all';
use Stream 'head';
my $Tree = parser { $tree->(@_) };
my $Subtree = parser { $subtree->(@_) };
my $LEVEL = 0;
$tree = concatenate(lookfor('ITEM', sub { trim($_[0][1]) }),
action(sub { $LEVEL++ }),
star($Subtree),
action(sub { $LEVEL-- }));
my $BULLET = '[#*ox.+-]\s+';
sub trim {
my $s = shift;
$s =~ s/^ *//;
$s =~ s/^$BULLET//o;
return $s;
}
## Chapter 8 section 6
$tree = T(concatenate(lookfor('ITEM', sub { trim($_[0][1]) }),
action(sub { $LEVEL++; return 1; }),
star($Subtree),
action(sub { $LEVEL--; return 1; })),
sub { [ $_[0], @{$_[2]} ] });
$subtree = T(concatenate(test(sub {
my $input = shift;
return unless $input;
my $next = head($input);
return unless $next->[0] eq 'ITEM';
return level_of($next->[1]) >= $LEVEL;
}),
$Tree,
),
sub { $_[1] });
sub level_of {
my ($space) = $_[0] =~ /^( *)/;
return length($space)/2;
}

View File

@@ -0,0 +1,46 @@
###
### outline-parser-2
###
## Chapter 8 section 6
my @LEVEL;
$tree = T(concatenate(T(lookfor('ITEM', sub { $_[0] }),
sub {
my $s = $_[1];
push @LEVEL, level_of($s);
return trim($s);
}),
star($Subtree),
action(sub { pop @LEVEL })),
sub { [ $_[0], @{$_[1]} ]},
);
## Chapter 8 section 6
$subtree = T(concatenate(test(sub {
my $input = shift;
return unless $input;
my $next = head($input);
return unless $next->[0] eq 'ITEM';
return level_of($next->[1]) > $LEVEL[-1];
}),
$Tree,),
sub { $_[1] });
my $PREFIX;
sub level_of {
my $count = 0;
my $s = shift;
if (! defined $PREFIX) {
($PREFIX) = $s =~ /^(\s*)/;
}
$s =~ s/^$PREFIX//o
or die "Item '$s' wasn't indented the same as the previous items.\n";
my ($indent) = $s =~ /^(\s*)/;
my $level = length($indent);
return $level;
}

View File

@@ -0,0 +1,12 @@
###
### regex-g-demo
###
## Chapter 8 section 1.2
my $target = "123..45.6789...0";
while ($target =~ /(\d+)/g) {
print "Saw '$1' ending at position ", pos($target), "\n";
}

View File

@@ -0,0 +1,85 @@
###
### regex-parser
###
## Chapter 8 section 5
use Lexer ':all';
use Stream 'node';
my ($regex, $alternative, $atom, $qatoms);
sub regex_to_stream {
my @input = @_;
my $input = sub { shift @input };
my $lexer = iterator_to_stream(
make_lexer($input,
['PAREN', qr/[()]/,],
['QUANT', qr/[*+?]/ ],
['BAR', qr/[|]/, ],
['ATOM', qr/\\x[0-9a-fA-F]{0,2} # hex escape
|\\\d+ # octal escape
|\\. # other \
|. # other char
/x, ],
)
);
my ($result) = $regex->($lexer);
return $result;
}
## Chapter 8 section 5
use Parser ':all';
my $Regex = parser { $regex ->(@_) };
my $Alternative = parser { $alternative->(@_) };
my $Atom = parser { $atom ->(@_) };
my $QAtom = parser { $qatom ->(@_) };
## Chapter 8 section 5
use Regex;
# regex -> alternative 'BAR' regex | alternative
$regex = alternate(T(concatenate($Alternative,
lookfor('BAR'),
$Regex),
sub { Regex::union($_[0], $_[2]) }),
$Alternative);
# alternative -> qatom alternative | (nothing)
$alternative = alternate(T(concatenate($QAtom, $Alternative),
\&Regex::concat),
T(\&nothing, sub { Regex::literal("") }));
my %quant;
# qatom -> atom ('QUANT' | (nothing))
$qatom = T(concatenate($Atom,
alternate(lookfor('QUANT'),
\&nothing),
),
sub { my ($at, $q) = @_;
defined $q ? $quant{$q}->($at) : $at });
%quant = ('*' => \&Regex::star,
'+' => \&Regex::plus,
'?' => \&Regex::query,
);
sub query {
my $s = shift;
union(literal(""), $s);
}
# atom -> 'ATOM' | '(' regex ')'
$atom = alternate(lookfor("ATOM", sub { Regex::literal($_[0][1]) }),
T(concatenate(lookfor(["PAREN", "("]),
$Regex,
lookfor(["PAREN", ")"]),
),
sub { $_[1] }),
);

View File

@@ -0,0 +1,42 @@
###
### tokens
###
## Chapter 9 section 1.3
sub tokens {
my ($input, $label, $pattern) = @_;
my @tokens;
my ($buf, $finished) = ("");
my $split = sub { split /($pattern)/, $_[0] };
my $maketoken = sub { [$label, $_[0] ]};
sub {
while (@tokens == 0 && ! $finished) {
my $i = $input->();
if (ref $i) { # Input has already been tokenized
my ($sep, $tok) = $split->($buf);
$tok = $maketoken->($tok) if defined $tok;
push @tokens, grep $_ ne "", $sep, $tok, $i;
$buf = "";
} else { # Input is an untokenized string
$buf .= $i if defined $i; # Append new input to buffer
my @newtoks = $split->($buf);
while (@newtoks > 2
|| @newtoks && ! defined $i) {
# Buffer contains complete separator plus complete token
# OR we've reached the end of the input
push @tokens, shift(@newtoks);
push @tokens, $maketoken->(shift @newtoks) if @newtoks;
}
# Reassemble remaining contents of buffer
$buf = join "", @newtoks;
$finished = 1 if ! defined $i;
@tokens = grep $_ ne "", @tokens;
}
}
return shift(@tokens);
}
}

View File

@@ -0,0 +1,24 @@
###
### tokens-calc
###
## Chapter 8 section 1.2
sub tokens {
my $target = shift;
return sub {
TOKEN: {
return ['INTEGER', $1] if $target =~ /\G (\d+) /gcx;
return ['PRINT'] if $target =~ /\G print \b /gcx;
return ['IDENTIFIER', $1] if $target =~ /\G ([A-Za-z_]\w*)/gcx;
return ['OPERATOR', $1] if $target =~ /\G (\*\*) /gcx;
return ['OPERATOR', $1] if $target =~ /\G ([-+*\/=()]) /gcx;
return ['TERMINATOR', $1] if $target =~ /\G (; \n* | \n+) /gcx;
redo TOKEN if $target =~ /\G \s+ /gcx;
return ['UNKNOWN', $1] if $target =~ /\G (.) /gcx;
return;
}
};
}