use v6-alpha; module Pugs::Runtime::Rule; # iterator_engine.pl - fglock # # *** NOTE *** # This file is written in a "subset" of Perl 6 # Please don't "fix" it - until the compiler is extended! #use strict; #use warnings; #use Smart::Comments; for debugging, look also at Filtered-Comments.pm =pod A "rule" function gets as argument a list: 0 - a string to match 1 - an optional "continuation" 2 - an optional "flags" hashref 'capture'=>1 means 'return whatever matches' it returns (or "yields"): undef - match failed or a hash containing: state - a "continuation" or undef bool - an "assertion" (true/false) match - the "match" tree or undef tail - the string tail or undef capture - the tree of captured things abort - the match was stopped by a { return } or a fail(), and it should not backtrack or whatever Continuations are used for backtracking. A "ruleop" function gets some arguments and returns a "rule". =cut # XXX - optimization - pass the string index around, # XXX instead of copying the whole string to $tail every time # XXX - weaken self-referential things sub ruleop::alternation { # alternation is first match (not longest). though we need a # separate longest match for tokens (putter on #perl6) # update: <%var> does longest match based on the keys length() (TimToady on #perl6) # note: the list in @$nodes can be modified at runtime my $nodes = shift @_; # XXX '= shift;' doesn't work??? #say "alternation list: @$nodes"; # die "alternation list is empty" unless ref($nodes) eq 'ARRAY' && @$nodes; return sub { ### testing alternations on : @_, $nodes return unless @$nodes; my $tail = @_[0]; # my $state = @_[1] ?? [ @{@_[1]} ] !! [ 0, 0 ]; # expanded - for debugging: my $state; if @_[1] { $state = @_[1]; #say "alt - reenter ",@$state.perl; } else { $state = [ 0, 0 ]; #say "concat - start"; } my $flags = @_[2]; my $match; #say "alt state @$state"; while ( defined $state ) { ### alternation string to match: "$tail - (node,state)=@$state" $match = $nodes[ $state[0] ]( $tail, $state[1], $flags ); ### match: $match #say "alternation bool:$match in string: @_[0]"; if ( $match ) { $state[1] = $match; } else { $state[0]++; $state[1] = 0; ### next alternation state - (node,state):@$state $state = undef if $state[0] >= @{$nodes}.elems; } $match = $state; return $match if $match || $match; } return; } } sub ruleop::concat { # note: the list in @nodes can NOT be modified at runtime # update: this is ok, because we can use <$var><$var> instead return ruleop::concat( ( shift @_ ), ruleop::concat( @_ ) ) if @_ > 2; my @nodes = @_; return sub { my $tail = @_[0]; # my @state = @_[1] ?? ( @{@_[1]} ) !! ( 0, 0 ); # expanded - for debugging: my @state; if @_[1] { @state = @{@_[1]}; #say "concat - reenter ",@state.perl; } else { @state = ( 0, 0 ); #say "concat - start"; } my $flags = @_[2]; my @matches; while (1) { @matches[0] = @nodes[0]( $tail, @state[0], $flags ); ### 1st match: @matches[0] return @matches[0] if @matches[0]; if ( ! @matches[0] ) { return unless defined @matches[0]; @state = ( @matches[0], 0 ); #say "concat - backtracking #0"; next; } @matches[1] = @nodes[1]( @matches[0], @state[1], $flags ); ### 2nd match: @matches[1] return @matches[1] if @matches[1]; if ( ! @matches[1] ) { if ( ! defined( @matches[1] ) ) { return unless defined @matches[0]; @state = ( @matches[0], 0 ); } ### backtracking - state: @state ### backtracking - match: @matches #say "concat - backtracking #1"; next; } #say "concat return tail: $tail"; my $succ; if ( ! defined( @matches[1] ) ) { $succ = [ @matches[0], 0 ] if defined @matches[0]; } else { $succ = [ @state[0], @matches[1] ]; } my $capture = []; ### capture: @matches[0],@matches[1] $capture = @matches[0]{capture} if @matches[0]; push @$capture, @{@matches[1]} if @matches[1]; undefine $capture unless @{$capture}; return { bool => 1, match => [ @matches ], tail => @matches[1], state => $succ, capture => $capture, }; } } } sub ruleop::constant { my $const = shift @_; # say "constant $const"; return sub { #say "matching constant:$const in @_[0]"; return if ! @_[0]; # return unless @_[0] ~~ m/^(\Q$const\E)(.*)/s; # return unless @_[0] ~~ perl5:m:s:/^(\Q$const\E)(.*)/; # return unless @_[0] ~~ /^ ( $const )(.*)/; # (putter on #perl6) # return unless @_[0] ~~ rx:perl5/^($const)(.*)/; my $m = substr( @_[0], 0, $const.chars ); return if $m ne $const; my $tail = substr( @_[0], $const.chars ); #say "Matched $m in $tail"; if defined @_[2] { if @_[2] { my $t = { bool => 1, match => { constant => $m ,}, capture => [ $m ], tail => $tail, } } } return { bool => 1, match => { constant => $m ,}, tail => $tail, } } } sub ruleop::null { return sub { return { bool => 1, match => 'null', ( @_[2] ?? ( @_[2] ?? ( capture => [ '' ] ) !! () ) !! () ), tail => @_[0], } } }; sub ruleop::capture { # sets the 'capture' flag and return a labeled capture # XXX - generalize to: set_flag('capture',1) my $label = shift @_; my $node = shift @_; sub { my @param = @_; $param[2] = {} unless defined $param[2]; $param[2] = { %{$param[2]}, capture=>1 }; my $match = $node( @param ); return unless $match; return if $match; my $new_match = { %$match }; $new_match = [ { $label => $match } ]; return $new_match; } } =for capture At runtime, this must return _only_ the capture set inside capture_closure: xx(xx(xx( capture_closure(..) ))) One way to do it is to post-process the match: try( xx(xx(xx( abort( capture_closure(..) ) ))) ) abort() sets a 'rule_finished' flag in the returned match, that makes it return until the start of the rule, which unsets the flag before returning. - this can also be used to do fail() and assert(), and 'no-backtracking checkpoints' =cut # experimental! sub ruleop::try { my $op = shift @_; return sub { my $match = $op( @_ ); ### abortable match... $match = 0; return $match; }; }; # experimental! sub ruleop::abort { my $op = shift @_; return sub { my $match = $op( @_ ); ### aborting match: $match $match = 1; return $match; }; }; # experimental! sub ruleop::negate { my $op = shift @_; return sub { my $tail = @_[0]; my $match = $op( @_ ); return if $match; return { bool => 1, match => 'null', tail => $tail, } }; }; # experimental! =for example # adds an 'before' or 'after' sub call, which may print a debug message ruleop::wrap( { before => sub { print "matching variable: @_[0]\n" }, after => sub { @_[0] ?? print "matched\n" !! print "no match\n" }, }, \&variable ) =cut sub ruleop::wrap { my $debug = shift @_; my $node = shift @_; sub { $debug( @_ ) if $debug; my $match = $node( @_ ); $debug( $match, @_ ) if $debug; return $match; } } # ------- higher-order ruleops sub ruleop::optional { return ruleop::alternation( [ @_[0], ruleop::null() ] ); } sub ruleop::null_or_optional { return ruleop::alternation( [ ruleop::null(), @_[0] ] ); } sub ruleop::greedy_plus { my $node = shift @_; my $alt; # perl5 version: # $alt = ruleop::concat( # $node, # ruleop::optional( sub{ $alt() } ), # ); # putter++ - fixed $alt bug $alt = ruleop::concat( $node, ruleop::optional( #$alt sub { $alt(@_) } ), ); return $alt; } sub ruleop::greedy_star { my $node = shift @_; return ruleop::optional( ruleop::greedy_plus( $node ) ); } sub ruleop::non_greedy_star { my $node = shift @_; ruleop::alternation( [ ruleop::null(), ruleop::non_greedy_plus( $node ) ] ); } sub ruleop::non_greedy_plus { my $node = shift @_; # XXX - needs optimization for faster backtracking, less stack usage return sub { my $tail = @_[0]; my $state = @_[1] || { 'state' => undef, 'op' => $node ,}; my $flags = @_[2]; # XXX - didn't work in p5 version # my $match = $state( $tail, $state, $flags ); my $match = $state( $tail, undef, $flags ); return unless $match; $match = { 'state' => $match, 'op' => ruleop::concat( $node, $state ), }; return $match; } } 1;