Skip to content
This repository has been archived by the owner on Feb 3, 2021. It is now read-only.

Commit

Permalink
Add more backslash sequences: \b, \e, \f, \h, \r, \t, \v and negations.
Browse files Browse the repository at this point in the history
  • Loading branch information
pmichaud committed Oct 9, 2009
1 parent cef1481 commit 1f30c77
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 8 deletions.
8 changes: 5 additions & 3 deletions STATUS
@@ -1,4 +1,4 @@
2009-10-08:
2009-10-09:

At the moment, nqp-rx is configured to build an executable called
"p6regex", which is a Perl 6 regular expression compiler for Parrot.
Expand Down Expand Up @@ -41,7 +41,7 @@ The key files for the p6regex compiler are:
src/Regex/P6Regex/Actions.pm # actions to create PAST from parse


Things that work (2009-10-08, 22h23 UTC):
Things that work (2009-10-09, 08h07 UTC):

* bare literal strings
* quantifiers *, +, ?, *:, +:, ?:, *?, +?, ??, *!, +!, ?!
Expand All @@ -52,4 +52,6 @@ Things that work (2009-10-08, 22h23 UTC):
* anchors ^, ^^, $, $$, <<, >>
* backslash-quoted punctuation
* #-comments (mostly)
* obsolete backslash sequences \A \Z \z \Q \E
* obsolete backslash sequences \A \Z \z \Q
* \b, \B, \e, \E, \f, \F, \h, \H, \r, \R, \t, \T, \v, \V
* enumerated character lists <[ab0..9]>
6 changes: 3 additions & 3 deletions src/PAST/Compiler-Regex.pir
Expand Up @@ -477,16 +477,16 @@ character list.

.local string charlist
charlist = node[0]
charlist = self.'escape'(charlist)
.local pmc negate, testop
negate = node.'negate'()
testop = self.'??!!'(negate, 'ge', 'lt')

ops.'push_pirop'('inline', charlist, negate, 'inline'=>' # rx enumcharlist %0 negate=%1')
ops.'push_pirop'('inline', negate, 'inline'=>' # rx enumcharlist negate=%0')
ops.'push_pirop'('ge', pos, eos, fail)
ops.'push_pirop'('sub', '$I10', pos, off)
ops.'push_pirop'('substr', '$S10', tgt, '$I10', 1)
$S0 = self.'escape'(charlist)
ops.'push_pirop'('index', '$I11', $S0, '$S10')
ops.'push_pirop'('index', '$I11', charlist, '$S10')
ops.'push_pirop'(testop, '$I11', 0, fail)
ops.'push_pirop'('inc', pos)
.return (ops)
Expand Down
45 changes: 44 additions & 1 deletion src/Regex/P6Regex/Actions.pm
Expand Up @@ -136,6 +136,49 @@ method backslash:sym<w>($/) {
make $past;
}

method backslash:sym<b>($/) {
my $past := PAST::Regex.new( "\b", :pasttype('enumcharlist'),
:negate($<sym> eq 'B'));
make $past;
}

method backslash:sym<e>($/) {
my $past := PAST::Regex.new( "\e", :pasttype('enumcharlist'),
:negate($<sym> eq 'E'));
make $past;
}

method backslash:sym<f>($/) {
my $past := PAST::Regex.new( "\f", :pasttype('enumcharlist'),
:negate($<sym> eq 'F'));
make $past;
}

method backslash:sym<h>($/) {
my $past := PAST::Regex.new( "\x[09,20,a0,1680,180e,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,200a,202f,205f,3000]", :pasttype('enumcharlist'),
:negate($<sym> eq 'H'));
make $past;
}

method backslash:sym<r>($/) {
my $past := PAST::Regex.new( "\r", :pasttype('enumcharlist'),
:negate($<sym> eq 'R'));
make $past;
}

method backslash:sym<t>($/) {
my $past := PAST::Regex.new( "\t", :pasttype('enumcharlist'),
:negate($<sym> eq 'T'));
make $past;
}

method backslash:sym<v>($/) {
my $past := PAST::Regex.new( "\x[0a,0b,0c,0d,85,2028,2029]", :pasttype('enumcharlist'),
:negate($<sym> eq 'V'));
make $past;
}


method backslash:sym<misc>($/) {
my $past := PAST::Regex.new( ~$/ , :pasttype('literal') );
make $past;
Expand Down Expand Up @@ -173,6 +216,6 @@ method cclass_elem($/) {
else { $str := $str ~ $_[0]; }
}
my $past := PAST::Regex.new( $str, :pasttype('enumcharlist') );
if $<sign> eq '-' { $past.negate(1); }
$past.negate( $<sign> eq '-' );
make $past;
}
8 changes: 7 additions & 1 deletion src/Regex/P6Regex/Grammar.pm
Expand Up @@ -64,10 +64,16 @@ grammar Regex::P6Regex::Grammar is PCT::Grammar;

# proto token backslash { <...> }
token backslash:sym<w> { $<sym>:=[<[dswnDSWN]>] {*} }
token backslash:sym<b> { $<sym>:=[<[bB]>] {*} }
token backslash:sym<e> { $<sym>:=[<[eE]>] {*} }
token backslash:sym<f> { $<sym>:=[<[fF]>] {*} }
token backslash:sym<h> { $<sym>:=[<[hH]>] {*} }
token backslash:sym<r> { $<sym>:=[<[rR]>] {*} }
token backslash:sym<t> { $<sym>:=[<[tT]>] {*} }
token backslash:sym<v> { $<sym>:=[<[vV]>] {*} }
token backslash:sym<A> { 'A' <.obs: '\\A as beginning-of-string matcher;^'> }
token backslash:sym<z> { 'z' <.obs: '\\z as end-of-string matcher;$'> }
token backslash:sym<Z> { 'Z' <.obs: '\\Z as end-of-string matcher;\\n?$'> }
token backslash:sym<E> { 'E' <.obs: '\\E as quotemeta;quotes or literal variable match'> }
token backslash:sym<Q> { 'Q' <.obs: '\\Q as quotemeta;quotes or literal variable match'> }
token backslash:sym<misc> { \W {*} }

Expand Down

0 comments on commit 1f30c77

Please sign in to comment.