42 lines
1.2 KiB
Plaintext
42 lines
1.2 KiB
Plaintext
|
|
|
|
###
|
|
### tokens
|
|
###
|
|
|
|
## Chapter 9 section 1.3
|
|
|
|
|
|
sub tokens {
|
|
my ($input, $label, $pattern) = @_;
|
|
my @tokens;
|
|
my ($buf, $finished) = ("");
|
|
my $split = sub { split /($pattern)/, $_[0] };
|
|
my $maketoken = sub { [$label, $_[0] ]};
|
|
sub {
|
|
while (@tokens == 0 && ! $finished) {
|
|
my $i = $input->();
|
|
if (ref $i) { # Input has already been tokenized
|
|
my ($sep, $tok) = $split->($buf);
|
|
$tok = $maketoken->($tok) if defined $tok;
|
|
push @tokens, grep $_ ne "", $sep, $tok, $i;
|
|
$buf = "";
|
|
} else { # Input is an untokenized string
|
|
$buf .= $i if defined $i; # Append new input to buffer
|
|
my @newtoks = $split->($buf);
|
|
while (@newtoks > 2
|
|
|| @newtoks && ! defined $i) {
|
|
# Buffer contains complete separator plus complete token
|
|
# OR we've reached the end of the input
|
|
push @tokens, shift(@newtoks);
|
|
push @tokens, $maketoken->(shift @newtoks) if @newtoks;
|
|
}
|
|
# Reassemble remaining contents of buffer
|
|
$buf = join "", @newtoks;
|
|
$finished = 1 if ! defined $i;
|
|
@tokens = grep $_ ne "", @tokens;
|
|
}
|
|
}
|
|
return shift(@tokens);
|
|
}
|
|
} |