Skip to content
This repository has been archived by the owner on Feb 3, 2021. It is now read-only.

Commit

Permalink
Browse files Browse the repository at this point in the history
Add greedy form of separator quantifier. Clean up charlist a bit.
  • Loading branch information
pmichaud committed Oct 9, 2009
1 parent f12bd0c commit 0b0e14d
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 14 deletions.
10 changes: 10 additions & 0 deletions src/PAST/Compiler-Regex.pir
Expand Up @@ -632,6 +632,13 @@ second child of this node.
q2label = self.'post_new'('Label', 'result'=>$S0)
cpost = self.'concat'(node)

.local pmc seppast, seppost
null seppost
seppast = node.'sep'()
unless seppast goto have_seppost
seppost = self.'post_regex'(seppast)
have_seppost:

$S0 = max
.local int needrep
$I0 = isgt min, 1
Expand Down Expand Up @@ -675,6 +682,9 @@ second child of this node.
greedy_4:
unless max != 1 goto greedy_5
self.'!cursorop'(ops, '!mark_push', 0, rep, pos, btreg)
if null seppost goto greedy_4a
ops.'push'(seppost)
greedy_4a:
ops.'push_pirop'('goto', q1label)
greedy_5:
ops.'push'(q2label)
Expand Down
7 changes: 7 additions & 0 deletions src/PAST/Regex.pir
Expand Up @@ -55,6 +55,13 @@ for regular expressions.
.end
.sub 'sep' :method
.param pmc value :optional
.param int has_value :opt_flag
.tailcall self.'attr'('sep', value, has_value)
.end
.sub 'subtype' :method
.param pmc value :optional
.param int has_value :opt_flag
Expand Down
12 changes: 9 additions & 3 deletions src/Regex/P6Regex/Actions.pm
Expand Up @@ -75,9 +75,15 @@ method quantifier:sym<?>($/) {

method quantifier:sym<**>($/) {
my $past := $<quantmod>.ast;
$past.min(+$<min>);
if ! $<max> { $past.max(+$<min>); }
elsif $<max>[0] ne '*' { $past.max(+$<max>[0]); }
if $<quantified_atom> {
$past.min(1);
$past.sep($<quantified_atom>.ast);
}
else {
$past.min(+$<min>);
if ! $<max> { $past.max(+$<min>); }
elsif $<max>[0] ne '*' { $past.max(+$<max>[0]); }
}
make $past;
}

Expand Down
7 changes: 4 additions & 3 deletions src/Regex/P6Regex/Grammar.pm
Expand Up @@ -44,9 +44,10 @@ grammar Regex::P6Regex::Grammar is PCT::Grammar;
token quantifier:sym<+> { $<sym>=['+'] <quantmod> {*} }
token quantifier:sym<?> { $<sym>=['?'] <quantmod> {*} }
token quantifier:sym<**> {
$<sym>=['**'] <quantmod>
$<sym>=['**'] \s* <quantmod> \s*
[
| $<min>=[\d+] [ '..' $<max>=[\d+|'*'] ]?
|| $<min>=[\d+] [ '..' $<max>=[\d+|'*'] ]?
|| <quantified_atom>
]
{*}
}
Expand Down Expand Up @@ -109,7 +110,7 @@ grammar Regex::P6Regex::Grammar is PCT::Grammar;
[
| '[' $<charspec>=(
| '-' <.obs: "hyphen in enumerated character class;..">
| [ \\ (.) | (<-[\]]>) ] [ '..' (.) ]?
| [ \\ (.) | (<-[\]\\]>) ] [ '..' (.) ]?
)*
']'
| $<name>=[\w+]
Expand Down
4 changes: 2 additions & 2 deletions t/p6regex/rx_charclass
Expand Up @@ -24,7 +24,7 @@
<-[b..d]> bbccdd n negated character range
# todo :pge<reversed character range>
<-[d..b]> bbccdd /parse error/ illegal character range
<[-]> ab-def /parse error/ unescaped hyphen
<[-]> ab-def /Obsolete/ unescaped hyphen
<[\-]> ab-def y escaped hyphen
<[\-]> abcdef n escaped hyphen
<-[\-]> ---x-- y negated escaped hyphen
Expand All @@ -41,7 +41,7 @@
<-[+\-]> ------ n negated escaped hyphen in range
<["\\]> \\ y escaped backslash
<[\]]> ] y escaped close bracket
<[\]> \\]] /parse error/ unescaped backslash (or no closing brace)
<[\]> \\]] /error/ unescaped backslash (or no closing brace)
^\><[<]> >< y lt character class
^<[>]>\< >< y gt character class
# todo :pugs<feature>
Expand Down
12 changes: 6 additions & 6 deletions t/p6regex/rx_quantifiers
Expand Up @@ -181,9 +181,9 @@ a**:?2..4 baaabbb y two "a" characters (non-greedy)
a**!2..4 baaabbb y three "a" characters (explicit greed)
a**:!2..4 baaabbb y three "a" characters (explicit greed)

^\w+ ** ','$ foo,bar,baz y ** with a term
^\w+ **? ',' ....$ foo,bar,baz y **? with a term
^\w+ **[','\s*]$ foo, bar, baz y ** with term + ws
:sigspace ^\w+ ** ',' $ foo, bar ,baz y ** under :sigspace
:sigspace ^\w+**',' $ foo, bar ,baz n ** under :sigspace w/o ws
:sigspace ^\w+**',' $ foo,bar,baz y ** under :sigspace w/o ws
^[\w+] ** \,$ foo,bar,baz y ** with a term
^[\w+] **? \, ....$ foo,bar,baz y **? with a term
^[\w+] ** [\,\s*]$ foo, bar, baz y ** with term + ws
:sigspace ^[\w+] ** \, $ foo, bar ,baz y ** under :sigspace
:sigspace ^[\w+]** \, $ foo, bar ,baz n ** under :sigspace w/o ws
:sigspace ^[\w+]** \, $ foo,bar,baz y ** under :sigspace w/o ws

0 comments on commit 0b0e14d

Please sign in to comment.