plan9port/src/libregexp/rregexec.c
Russ Cox a7511dd43d libregexp: fix ambiguous match selection
echo SYSSYSR1 | sed 's/SYS.+/sysr1/'

was producing SYSsysr1 instead of sysr1.
Bug was introduced during overflow cleanup earlier this year.

Also bring regexec.c and rregexec.c into sync again.
Also allocate large enough lists in the regexec2/rregexec2 case.
2007-12-07 15:32:45 -05:00

222 lines
4.8 KiB
C

#include "lib9.h"
#include "regexp9.h"
#include "regcomp.h"
/*
* return 0 if no match
* >0 if a match
* <0 if we ran out of _relist space
*/
static int
rregexec1(Reprog *progp, /* program to run */
Rune *bol, /* string to run machine on */
Resub *mp, /* subexpression elements */
int ms, /* number of elements at mp */
Reljunk *j)
{
int flag=0;
Reinst *inst;
Relist *tlp;
Rune *s;
int i, checkstart;
Rune r, *rp, *ep;
Relist* tl; /* This list, next list */
Relist* nl;
Relist* tle; /* ends of this and next list */
Relist* nle;
int match;
Rune *p;
match = 0;
checkstart = j->starttype;
if(mp)
for(i=0; i<ms; i++) {
mp[i].s.rsp = 0;
mp[i].e.rep = 0;
}
j->relist[0][0].inst = 0;
j->relist[1][0].inst = 0;
/* Execute machine once for each character, including terminal NUL */
s = j->rstarts;
do{
/* fast check for first char */
if(checkstart) {
switch(j->starttype) {
case RUNE:
p = runestrchr(s, j->startchar);
if(p == 0 || (j->reol && p >= j->reol))
return match;
s = p;
break;
case BOL:
if(s == bol)
break;
p = runestrchr(s, '\n');
if(p == 0 || (j->reol && p+1 >= j->reol))
return match;
s = p+1;
break;
}
}
r = *s;
/* switch run lists */
tl = j->relist[flag];
tle = j->reliste[flag];
nl = j->relist[flag^=1];
nle = j->reliste[flag];
nl->inst = 0;
/* Add first instruction to current list */
if(match == 0)
_rrenewemptythread(tl, tle, progp->startinst, ms, s);
/* Execute machine until current list is empty */
for(tlp=tl; tlp->inst; tlp++){
for(inst = tlp->inst; ; inst = inst->u2.next){
switch(inst->type){
case RUNE: /* regular character */
if(inst->u1.r == r)
if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
return -1;
break;
case LBRA:
tlp->se.m[inst->u1.subid].s.rsp = s;
continue;
case RBRA:
tlp->se.m[inst->u1.subid].e.rep = s;
continue;
case ANY:
if(r != '\n')
if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
return -1;
break;
case ANYNL:
if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
return -1;
break;
case BOL:
if(s == bol || *(s-1) == '\n')
continue;
break;
case EOL:
if(s == j->reol || r == 0 || r == '\n')
continue;
break;
case CCLASS:
ep = inst->u1.cp->end;
for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
if(r >= rp[0] && r <= rp[1]){
if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
return -1;
break;
}
break;
case NCCLASS:
ep = inst->u1.cp->end;
for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
if(r >= rp[0] && r <= rp[1])
break;
if(rp == ep)
if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
return -1;
break;
case OR:
/* expanded during renewthread; just a place holder */
break;
case END: /* Match! */
match = 1;
tlp->se.m[0].e.rep = s;
if(mp != 0)
_renewmatch(mp, ms, &tlp->se);
break;
}
break;
}
}
if(s == j->reol)
break;
checkstart = j->starttype && nl->inst==0;
s++;
}while(r);
return match;
}
static int
rregexec2(Reprog *progp, /* program to run */
Rune *bol, /* string to run machine on */
Resub *mp, /* subexpression elements */
int ms, /* number of elements at mp */
Reljunk *j
)
{
int rv;
Relist *relist0, *relist1;
relist0 = malloc((progp->proglen+1)*sizeof(Relist));
if(relist0 == nil)
return -1;
relist1 = malloc((progp->proglen+1)*sizeof(Relist));
if(relist1 == nil){
free(relist1);
return -1;
}
j->relist[0] = relist0;
j->relist[1] = relist1;
j->reliste[0] = relist0 + progp->proglen;
j->reliste[1] = relist1 + progp->proglen;
rv = rregexec1(progp, bol, mp, ms, j);
free(relist0);
free(relist1);
return rv;
}
extern int
rregexec(Reprog *progp, /* program to run */
Rune *bol, /* string to run machine on */
Resub *mp, /* subexpression elements */
int ms) /* number of elements at mp */
{
Reljunk j;
Relist relist0[LISTSIZE], relist1[LISTSIZE];
int rv;
/*
* use user-specified starting/ending location if specified
*/
j.rstarts = bol;
j.reol = 0;
if(mp && ms>0){
if(mp->s.sp)
j.rstarts = mp->s.rsp;
if(mp->e.ep)
j.reol = mp->e.rep;
}
j.starttype = 0;
j.startchar = 0;
if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) {
j.starttype = RUNE;
j.startchar = progp->startinst->u1.r;
}
if(progp->startinst->type == BOL)
j.starttype = BOL;
/* mark space */
j.relist[0] = relist0;
j.relist[1] = relist1;
j.reliste[0] = relist0 + nelem(relist0) - 1;
j.reliste[1] = relist1 + nelem(relist1) - 1;
rv = rregexec1(progp, bol, mp, ms, &j);
if(rv >= 0)
return rv;
rv = rregexec2(progp, bol, mp, ms, &j);
if(rv >= 0)
return rv;
return -1;
}