Skip to content
This repository has been archived by the owner on Feb 3, 2021. It is now read-only.

Commit

Permalink
Handle positional (numbered) captures.
Browse files Browse the repository at this point in the history
  • Loading branch information
pmichaud committed Oct 13, 2009
1 parent 8a90013 commit 5045605
Show file tree
Hide file tree
Showing 6 changed files with 177 additions and 51 deletions.
9 changes: 7 additions & 2 deletions src/PAST/Compiler-Regex.pir
Expand Up @@ -807,9 +807,13 @@ Perform a subrule call.

.local pmc name
$P0 = node.'name'()
name = self.'as_post'($P0, 'rtype'=>'~')
name = self.'as_post'($P0, 'rtype'=>'*')
ops.'push'(name)

.local pmc subpast, subpost
subpast = node[0]
subpost = self.'as_post'(subpast, 'rtype'=>'*')

.local pmc negate
.local string testop
negate = node.'negate'()
Expand All @@ -821,7 +825,8 @@ Perform a subrule call.
ops.'push_pirop'('inline', name, subtype, negate, 'inline'=>" # rx subrule %0 subtype=%1 negate=%2")

self.'!cursorop'(ops, '!cursor_pos', 0, pos)
ops.'push_pirop'('callmethod', name, cur, 'result'=>'$P10')
ops.'push'(subpost)
ops.'push_pirop'('callmethod', subpost, cur, 'result'=>'$P10')
ops.'push_pirop'(testop, '$P10', fail)
if subtype == 'zerowidth' goto done
ops.'push_pirop'('callmethod', '"pos"', '$P10', 'result'=>pos)
Expand Down
43 changes: 33 additions & 10 deletions src/Regex/Cursor.pir
Expand Up @@ -13,6 +13,8 @@ grammars.

=cut

.include 'cclass.pasm'

.namespace ['Regex';'Cursor']

.sub '' :anon :load :init
Expand Down Expand Up @@ -66,11 +68,19 @@ for the Cursor if one hasn't been created yet.
caphash = new ['Hash']
caparray_loop:
unless caparray_it goto caparray_done
$P0 = shift caparray_it
$P1 = new ['ResizablePMCArray']
match[$P0] = $P1
caphash[$P0] = $P1
.local string subname
.local pmc arr
.local int keyint
subname = shift caparray_it
arr = new ['ResizablePMCArray']
caphash[subname] = arr
keyint = is_cclass .CCLASS_NUMERIC, subname, 0
if keyint goto caparray_int
match[subname] = arr
goto caparray_loop
caparray_int:
$I0 = subname
match[$I0] = arr
caparray_done:

# If it's not a successful match, or if there are
Expand All @@ -83,20 +93,33 @@ for the Cursor if one hasn't been created yet.
cstack_it = iter cstack
cstack_loop:
unless cstack_it goto cstack_done
.local pmc subcur, subnames, submatch
.local pmc subcur, submatch
subcur = shift cstack_it
# If the subcursor isn't bound with a name, skip it
subnames = getattribute subcur, '$!names'
if null subnames goto cstack_loop
$P0 = getattribute subcur, '$!names'
if null $P0 goto cstack_loop
subname = $P0
submatch = subcur.'MATCH'()
keyint = is_cclass .CCLASS_NUMERIC, subname, 0
if null caparray goto cstack_bind
$I0 = exists caphash[subnames]
$I0 = exists caphash[subname]
unless $I0 goto cstack_bind
$P0 = match[subnames]
if keyint goto cstack_array_int
$P0 = match[subname]
push $P0, submatch
goto cstack_loop
cstack_array_int:
$I0 = subname
$P0 = match[$I0]
push $P0, submatch
goto cstack_loop
cstack_bind:
match[subnames] = submatch
if keyint goto cstack_bind_int
match[subname] = submatch
goto cstack_loop
cstack_bind_int:
$I0 = subname
match[$I0] = submatch
goto cstack_loop
cstack_done:

Expand Down
110 changes: 72 additions & 38 deletions src/Regex/P6Regex/Actions.pm
@@ -1,34 +1,27 @@
class Regex::P6Regex::Actions;

method TOP($/) {
my $regex := PAST::Regex.new(
my $rpast := $<nibbler>.ast;
my %capnames := capnames($rpast, 0);
%capnames{''} := 0;
$rpast := PAST::Regex.new(
PAST::Regex.new( :pasttype('scan') ),
$<nibbler>.ast,
$rpast,
PAST::Regex.new( :pasttype('pass') ),
:pasttype('concat'),
:capnames($<nibbler>.ast.capnames)
:capnames(%capnames)
);
my $past := PAST::Block.new( $regex, :blocktype('method') );
my $past := PAST::Block.new( $rpast, :blocktype('method') );
make $past;
}

method nibbler($/) {
my $past;
if +$<termish> > 1 {
$past := PAST::Regex.new( :pasttype('alt') );
my %capnames;
for $<termish> {
$past.push($_.ast);
my $astcap := $_.ast.capnames;
if $astcap {
for $astcap {
%capnames{$_} := %capnames{$_} == 1 && $astcap{$_} == 1
?? 1
!! $astcap{$_};
}
}
}
if %capnames { $past.capnames(%capnames); }
}
else {
$past := $<termish>[0].ast;
Expand All @@ -39,7 +32,6 @@ method nibbler($/) {
method termish($/) {
my $past := PAST::Regex.new( :pasttype('concat') );
my $lastlit := 0;
my %capnames;
for $<noun> {
my $ast := $_.ast;
if $lastlit && $ast.pasttype eq 'literal' {
Expand All @@ -48,30 +40,16 @@ method termish($/) {
else {
$past.push($ast);
$lastlit := $ast.pasttype eq 'literal' ?? $ast !! 0;
my $astcap := $ast.capnames;
if $astcap {
for $astcap {
%capnames{$_} := +%capnames{$_} + $astcap{$_};
}
}
}
}
if +$past.list == 1 { $past := $past[0]; }
elsif %capnames { $past.capnames(%capnames) }
make $past;
}

method quantified_atom($/) {
my $past := $<atom>.ast;
if $<quantifier> {
my $qast := $<quantifier>[0].ast;
if $past.capnames {
my %caphash;
for $past.capnames {
%caphash{$_} := 2;
}
$qast.capnames(%caphash);
}
$qast.unshift($past);
$past := $qast;
}
Expand Down Expand Up @@ -130,6 +108,23 @@ method metachar:sym<[ ]>($/) {
make $<nibbler>.ast;
}

method metachar:sym<( )>($/) {
my $rpast := $<nibbler>.ast;
my %capnames := capnames($rpast, 0);
%capnames{''} := 0;
$rpast := PAST::Regex.new(
PAST::Regex.new( :pasttype('scan') ),
$rpast,
PAST::Regex.new( :pasttype('pass') ),
:pasttype('concat'),
:capnames(%capnames)
);
my $subpast := PAST::Block.new( $rpast, :blocktype('method') );
my $past := PAST::Regex.new( $subpast, :pasttype('subrule'),
:subtype('capture') );
make $past;
}

method metachar:sym<.>($/) {
my $past := PAST::Regex.new( :pasttype('charclass'), :subtype('.') );
make $past;
Expand Down Expand Up @@ -231,31 +226,26 @@ method backslash:sym<misc>($/) {
method assertion:sym<?>($/) {
my $past := $<assertion>.ast;
$past.subtype('zerowidth');
$past.capnames(0);
make $past;
}

method assertion:sym<!>($/) {
my $past := $<assertion>.ast;
$past.negate( !$past.negate );
$past.subtype('zerowidth');
$past.capnames(0);
make $past;
}

method assertion:sym<method>($/) {
my $past := $<assertion>.ast;
$past.subtype('method');
$past.capnames(0);
make $past;
}

method assertion:sym<name>($/) {
my $name := ~$<longname>;
my %capnames;
%capnames{$name} := 1;
my $past := PAST::Regex.new( :name($name) , :pasttype('subrule'),
:capnames(%capnames) );
my $past := PAST::Regex.new( $name, :name($name) , :pasttype('subrule'),
:subtype('capture') );
make $past;
}

Expand All @@ -267,8 +257,8 @@ method cclass_elem($/) {
my $str := '';
my $past;
if $<name> {
$past := PAST::Regex.new( :name(~$<name>), :pasttype('subrule'),
:subtype('method') );
my $name := ~$<name>;
$past := PAST::Regex.new( $name, :pasttype('subrule'), :subtype('method') );
} else {
for $<charspec> {
if $_[1] {
Expand Down Expand Up @@ -300,3 +290,47 @@ method cclass_elem($/) {
$past.negate( $<sign> eq '-' );
make $past;
}


sub capnames($ast, $count) {
my %capnames;
my $pasttype := $ast.pasttype;
if $pasttype eq 'alt' {
my $max := $count;
for $ast.list {
my %x := capnames($_, $count);
for %x {
%capnames{$_} := +%capnames{$_} < 2 && %x{$_} == 1
?? 1
!! 2;
}
if %x{''} > $max { $max := %x{''}; }
}
$count := $max;
}
elsif $pasttype eq 'concat' {
for $ast.list {
my %x := capnames($_, $count);
for %x {
%capnames{$_} := +%capnames{$_} + %x{$_};
}
$count := %x{''};
}
}
elsif $pasttype eq 'subrule' && $ast.subtype eq 'capture' {
if $ast.name eq '' {
$ast.name($count);
$count := $count + 1;
}
%capnames{$ast.name} := 1;
}
elsif $pasttype eq 'quant' {
my %astcap := capnames($ast[0], $count);
for %astcap {
%capnames{$_} := 2;
}
$count := %astcap{''};
}
%capnames{''} := $count;
%capnames;
}
1 change: 1 addition & 0 deletions src/Regex/P6Regex/Grammar.pm
Expand Up @@ -56,6 +56,7 @@ grammar Regex::P6Regex::Grammar is PCT::Grammar;

# proto token metachar { <...> }
token metachar:sym<[ ]> { '[' <nibbler> ']' {*} }
token metachar:sym<( )> { '(' <nibbler> ')' {*} }
token metachar:sym<.> { $<sym>=['.'] {*} }
token metachar:sym<^> { $<sym>=['^'] {*} }
token metachar:sym<^^> { $<sym>=['^^'] {*} }
Expand Down
2 changes: 1 addition & 1 deletion t/p6regex/01-regex.t
Expand Up @@ -78,7 +78,7 @@ Description of the test.
push test_files, 'rx_charclass'
push test_files, 'rx_subrules'
# push test_files, 'rx_lookarounds'
# push test_files, 'rx_captures'
push test_files, 'rx_captures'
# push test_files, 'rx_modifiers'
# push test_files, 'rx_syntax'
# push test_files, 'rx_goal'
Expand Down
63 changes: 63 additions & 0 deletions t/p6regex/rx_captures
@@ -0,0 +1,63 @@
## captures
(a.)..(..) zzzabcdefzzz y basic match
(a.)..(..) zzzabcdefzzz /mob: <abcdef @ 3>/ basic $0
(a.)..(..) zzzabcdefzzz /mob 0: <ab @ 3>/ basic $1
(a.)..(..) zzzabcdefzzz /mob 1: <ef @ 7>/ basic $2
(a(b(c))(d)) abcd y nested match
(a(b(c))(d)) abcd /mob: <abcd @ 0>/ nested match
(a(b(c))(d)) abcd /mob 0: <abcd @ 0>/ nested match
(a(b(c))(d)) abcd /mob 0 0: <bc @ 1>/ nested match
(a(b(c))(d)) abcd /mob 0 0 0: <c @ 2>/ nested match
(a(b(c))(d)) abcd /mob 0 1: <d @ 3>/ nested match
((\w+)+) abcd /mob: <abcd @ 0>/ nested match
((\w+)+) abcd /mob 0: <abcd @ 0>/ nested match
((\w+)+) abcd /mob 0 0 0: <abcd @ 0>/ nested match
((\w+)+) ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz /mob: <ABCD/ nested match
((\w+)+) ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz /mob 0: <ABCD/ nested match
((\w+)+) ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz /mob 0 0 0: <ABCD/ nested match
(a) [ (bc) (d) | .* (ef) ] .* (g) abcdefg /mob 0: <a @ 0>/ alt subpattern before group
(a) [ (bc) (d) | .* (ef) ] .* (g) abcdefg /mob 1: <bc @ 1>/ alt subpattern in group
(a) [ (bc) (d) | .* (ef) ] .* (g) abcdefg /mob 2: <d @ 3>/ alt subpattern in group
(a) [ (bc) (d) | .* (ef) ] .* (g) abcdefg /mob 3: <g @ 6>/ alt subpattern after group
(a) [ (bc) (x) | .* (ef) ] .* (g) abcdefg /mob 1: <ef @ 4>/ 2nd alt subpattern in group
(a) [ (bc) (x) | .* (ef) ] .* (g) abcdefg /mob 3: <g @ 6>/ 2nd alt subpattern after group
( (.) )* abc /mob 0 1 0: <b @ 1>/ nested repeated captures
[ (.) ]* abc /mob 0 1: <b @ 1>/ nested repeated captures
( [.] )* abc /mob 0 1: <b @ 1>/ nested repeated captures
(.) (.) $7=(.) (.) $4=(.) abcdefg /mob 0: <a @ 0>/ numbered aliases $1
(.) (.) $7=(.) (.) $4=(.) abcdefg /mob 1: <b @ 1>/ numbered aliases $2
(.) (.) $7=(.) (.) $4=(.) abcdefg /mob 7: <c @ 2>/ numbered aliases $7
(.) (.) $7=(.) (.) $4=(.) abcdefg /mob 8: <d @ 3>/ numbered aliases $8
(.) (.) $7=(.) (.) $4=(.) abcdefg /mob 4: <e @ 4>/ numbered aliases $4
$1=[ (.) (.) (.) ] (.) abcdefg /mob 1: <abc @ 0>/ perl5 numbered captures $1
$1=[ (.) (.) (.) ] (.) abcdefg /mob 2: <a @ 0>/ perl5 numbered captures $1
$1=[ (.) (.) (.) ] (.) abcdefg /mob 3: <b @ 1>/ perl5 numbered captures $1
$1=[ (.) (.) (.) ] (.) abcdefg /mob 4: <c @ 2>/ perl5 numbered captures $1
$1=[ (.) (.) (.) ] (.) abcdefg /mob 5: <d @ 3>/ perl5 numbered captures $1
# todo :pugs<feature>
:s $<key>=[\w+] \= $<val>=[\S+] abc = 123 /mob<key>: <abc @ 1>/ named capture
# todo :pugs<feature>
:s $<key>=[\w+] \= $<val>=[\S+] abc = 123 /mob<val>: <123 @ 7>/ named capture
# todo :pugs<feature>
:s (\w+) $<foo>=(\w+) (\w+) abc def ghi /mob<foo>: <def @ 4>/ mixing named and unnamed capture
# todo :pugs<feature>
:s (\w+) $<foo>=(\w+) (\w+) abc def ghi /mob 1: <ghi @ 8>/ mixing named and unnamed capture
# todo :pugs<feature>
<alpha> [ \- <alpha> ]? abc def ghi /mob<alpha> 0: <a @ 0>/ multiple subrule captures in same scope
# todo :pugs<feature>
[(.)$0]+ bookkeeper y backreference
# todo :pugs<feature>
(\w+) <+ws> $0 hello hello y backreference at end of string
# todo :pugs<feature>
[(.)$0]+ bookkeeper /mob 0 0: <o @ 1>/ backref $1
# todo :pugs<feature>
[(.)$0]+ bookkeeper /mob 0 1: <k @ 3>/ backref $1
# todo :pugs<feature>
[(.)$0]+ bookkeeper /mob 0 2: <e @ 5>/ backref $1
# todo :pugs<feature>
(.)*x 123x /mob: <123x @ 0>/ repeated dot capture

$<key>=<alpha> 12ab34 /mob<key>: <a @ 2>/ alias capture
<key=alpha> 12ab34 /mob<key>: <a @ 2>/ alias capture

## vim: noexpandtab tabstop=4 shiftwidth=4

0 comments on commit 5045605

Please sign in to comment.