Skip to content
This repository has been archived by the owner on Feb 3, 2021. It is now read-only.

Commit

Permalink
Browse files Browse the repository at this point in the history
Some code for naive token prefixes for regexes.
  • Loading branch information
pmichaud committed Oct 22, 2009
1 parent 9681684 commit efe456a
Show file tree
Hide file tree
Showing 4 changed files with 161 additions and 80 deletions.
23 changes: 22 additions & 1 deletion src/PAST/Compiler-Regex.pir
Expand Up @@ -63,7 +63,9 @@ Return the POST representation of the regex AST rooted by C<node>.
goto iter_loop
iter_done:

.local pmc startlabel, donelabel, faillabel
.local pmc peeklabel, startlabel, donelabel, faillabel
$S0 = concat prefix, 'peek'
peeklabel = self.'post_new'('Label', 'result'=>$S0)
$S0 = concat prefix, 'start'
startlabel = self.'post_new'('Label', 'result'=>$S0)
$S0 = concat prefix, 'done'
Expand Down Expand Up @@ -95,6 +97,25 @@ Return the POST representation of the regex AST rooted by C<node>.
.local string cur, rep, pos, tgt, off, eos
(cur, rep, pos, tgt, off, eos) = self.'!rxregs'('cur rep pos tgt off eos')

.local pmc peek
.local int peek_len
(peek :slurpy) = node.'prefix'('')
peek_len = elements peek
$I0 = 0
peek_loop:
unless $I0 < peek_len goto peek_done
$S0 = peek[$I0]
$S0 = self.'escape'($S0)
peek[$I0] = $S0
inc $I0
goto peek_loop
peek_done:
ops.'push_pirop'('getattribute', '$P10', 'self', '"$!type"')
ops.'push_pirop'('if_null', '$P10', peeklabel)
ops.'push_pirop'('ne', '$P10', CURSOR_TYPE_PEEK, peeklabel)
ops.'push_pirop'('return', peek :flat)
ops.'push'(peeklabel)

$S0 = concat '(', cur
concat $S0, ', '
concat $S0, pos
Expand Down
213 changes: 135 additions & 78 deletions src/PAST/Regex.pir
Expand Up @@ -84,101 +84,158 @@ for regular expressions.
.end
=item peek()
=item prefix()
Returns the prefixes associated with the regex tree rooted
at this node.
=cut
.sub 'peek' :method
.local pmc list
list = new ['ResizablePMCArray']
$I0 = isa self, ['PAST';'Regex']
unless $I0 goto peek_stop
.local pmc child_it
child_it = self.'iterator'()
.sub 'prefix' :method
.param string prefix
.param pmc tail :slurpy
.local string pasttype
pasttype = self.'pasttype'()
if pasttype == 'concat' goto concat
if pasttype == '' goto concat
if pasttype == 'literal' goto literal
if pasttype == 'alt' goto alt
if pasttype == 'alt_longest' goto alt_longest
peek_stop:
list = 0
.return (list)
peek_zero:
list = 1
list[0] = ''
.return (list)
# temporal alternation returns the prefixes of its first child
alt:
unless child_it goto peek_stop
$P0 = shift child_it
.tailcall 'peek'($P0)
if pasttype goto have_pasttype
pasttype = 'concat'
have_pasttype:
# declarative alternation returns prefixes of all children
alt_longest:
unless child_it goto alt_longest_done
$P0 = shift child_it
$P1 = 'peek'($P0)
$I0 = elements list
splice list, $P1, $I0, 0
goto alt_longest
alt_longest_done:
.return (list)
concat:
unless child_it goto peek_zero
$P0 = shift child_it
list = 'peek'($P0)
unless list goto peek_stop
concat_loop:
unless child_it goto concat_done
.local pmc catlist
if pasttype == 'scan' goto prefix_skip
$S0 = concat 'prefix_', pasttype
$I0 = can self, $S0
unless $I0 goto prefix_done
.tailcall self.$S0(prefix, tail)
prefix_skip:
unless tail goto prefix_done
.local pmc head
head = shift tail
.tailcall head.'prefix'(prefix, tail :flat)
prefix_done:
.return (prefix)
.end
.sub 'prefix_alt' :method
.param string prefix
.param pmc tail
.local pmc child_it, results
child_it = self.'iterator'()
results = new ['ResizablePMCArray']
child_loop:
unless child_it goto child_done
$P0 = shift child_it
catlist = 'peek'($P0)
unless catlist goto concat_done
# concatenate all elements of list with catlist
.local pmc newlist, i1, i2
newlist = new ['ResizablePMCArray']
i1 = iter list
concat_i1_loop:
unless i1 goto concat_i1_done
$S1 = shift i1
i2 = iter catlist
concat_i2_loop:
unless i2 goto concat_i1_loop
$S2 = shift i2
$S2 = concat $S1, $S2
push newlist, $S2
goto concat_i2_loop
concat_i1_done:
list = newlist
goto concat_loop
concat_done:
.return (list)
($P1 :slurpy) = $P0.'prefix'(prefix, tail :flat)
splice results, $P1, 0, 0
goto child_loop
child_done:
.return (results :flat)
.end
.sub 'prefix_alt_longest' :method
.param string prefix
.param pmc tail
.tailcall self.'prefix_alt'(prefix, tail :flat)
.end
.sub 'prefix_concat' :method
.param string prefix
.param pmc tail
$P0 = self.'list'()
splice tail, $P0, 0, 0
unless tail goto done
$P1 = shift tail
.tailcall $P1.'prefix'(prefix, tail :flat)
done:
.return (prefix)
.end
.sub 'prefix_literal' :method
.param string prefix
.param pmc tail
.local pmc lpast
lpast = self[0]
$I0 = isa lpast, ['String']
unless $I0 goto done
literal:
$P0 = self[0]
$I0 = isa $P0, 'String'
if $I0 goto literal_constant
.return (list)
literal_constant:
push list, $P0
.return (list)
.local string subtype
subtype = self.'subtype'()
if subtype == 'ignorecase' goto done
$S0 = lpast
prefix = concat prefix, $S0
unless tail goto done
$P0 = shift tail
.tailcall $P0.'prefix'(prefix, tail :flat)
done:
.return (prefix)
.end
.sub 'prefix_enumcharlist' :method
.param string prefix
.param pmc tail
.local pmc negate
negate = self.'negate'()
unless negate goto negate_done
.return (prefix)
negate_done:
.local string charlist
charlist = self[0]
unless tail goto charlist_notail
.local string subtype
subtype = self.'subtype'()
if subtype == 'zerowidth' goto charlist_notail
.local pmc result, head
result = new ['ResizablePMCArray']
head = shift tail
.local int pos, eos
eos = length charlist
pos = 0
charlist_loop:
unless pos < eos goto charlist_done
.local string char
char = substr charlist, pos, 1
$S0 = concat prefix, char
($P0 :slurpy) = head.'prefix'($S0, tail :flat)
splice result, $P0, 0, 0
inc pos
goto charlist_loop
charlist_done:
.return (result :flat)
charlist_notail:
$P0 = split '', charlist
.return ($P0 :flat)
.end
.sub 'prefix_subcapture' :method
.param string prefix
.param pmc tail
.tailcall self.'prefix_concat'(prefix, tail)
.end
=back
=head1 AUTHOR
Patrick Michaud <pmichaud@pobox.com> is the author and maintainer.
Expand Down
2 changes: 1 addition & 1 deletion src/Regex/Cursor.pir
Expand Up @@ -22,7 +22,7 @@ grammars.
load_bytecode 'P6object.pbc'
.local pmc p6meta
p6meta = new 'P6metaclass'
$P0 = p6meta.'new_class'('Regex::Cursor', 'attr'=>'$!target $!from $!pos $!match $!action $!names $!debug @!bstack @!cstack @!caparray')
$P0 = p6meta.'new_class'('Regex::Cursor', 'attr'=>'$!target $!from $!pos $!match $!action $!names $!debug $!type @!bstack @!cstack @!caparray')
$P0 = box 0
set_global '$!generation', $P0
$P0 = new ['Boolean']
Expand Down
3 changes: 3 additions & 0 deletions src/Regex/constants.pir
Expand Up @@ -2,3 +2,6 @@
.const int CURSOR_FAIL_GROUP = -2
.const int CURSOR_FAIL_RULE = -3
.const int CURSOR_FAIL_MATCH = -4

.const int CURSOR_TYPE_SCAN = 1
.const int CURSOR_TYPE_PEEK = 2

0 comments on commit efe456a

Please sign in to comment.