mirror of https://github.com/tildeclub/ex-vi.git
1071 lines
22 KiB
C
1071 lines
22 KiB
C
/*
|
|
* Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002.
|
|
*
|
|
* Sccsid @(#)regnfa.c 1.8 (gritter) 2/6/05
|
|
*/
|
|
/* UNIX(R) Regular Expresssion Library
|
|
*
|
|
* Note: Code is released under the GNU LGPL
|
|
*
|
|
* Copyright (C) 2001 Caldera International, Inc.
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to:
|
|
* Free Software Foundation, Inc.
|
|
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*/
|
|
|
|
/* #include "synonyms.h" */
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include "re.h"
|
|
#include <stddef.h>
|
|
#include <ctype.h>
|
|
|
|
typedef unsigned char Uchar;
|
|
typedef unsigned short Ushort;
|
|
|
|
/*
|
|
* Nondeterministic Finite Automata.
|
|
*/
|
|
typedef struct t_graph Graph;
|
|
struct t_graph
|
|
{
|
|
union
|
|
{
|
|
Graph *ptr;
|
|
Info info;
|
|
} alt;
|
|
Graph *next;
|
|
w_type op;
|
|
};
|
|
|
|
typedef struct t_stack Stack;
|
|
struct t_stack
|
|
{
|
|
Stack *link; /* simplifies cleanup */
|
|
Stack *prev; /* covered states */
|
|
Graph *wasgp; /* node associated with this state */
|
|
const Uchar *str; /* saved position in the string */
|
|
Ushort cnt; /* ROP_BRACE: traversal count */
|
|
};
|
|
|
|
/*
|
|
* A Context holds all the information needed for each
|
|
* potential path through the NFA graph.
|
|
*/
|
|
typedef struct t_ctxt Context;
|
|
struct t_ctxt
|
|
{
|
|
Context *link; /* simplifies cleanup */
|
|
Context *next; /* singly linked */
|
|
Stack *sp; /* nested counts */
|
|
Graph *gp; /* starting node */
|
|
Graph *wasgp; /* node associated with this state */
|
|
const Uchar *str; /* saved position in the string */
|
|
Ushort cnt; /* ROP_BRACE: traversal count */
|
|
size_t nset; /* length of rm[] that is currently set */
|
|
regmatch_t rm[1]; /* enough to cover re_nsub+1 (np->rmlen) */
|
|
};
|
|
|
|
struct re_nfa_ /*Nfa*/
|
|
{
|
|
Graph *gp; /* entire NFA */
|
|
Stack *sp; /* unused Stacks */
|
|
Stack *allsp; /* linked Stacks (for cleanup) */
|
|
Context *allcp; /* linked Contexts (for cleanup) */
|
|
Context *cur; /* Contexts to be continued now */
|
|
Context *step; /* Contexts waiting for a step of the NFA */
|
|
Context *avail; /* unused Contexts */
|
|
Context **ecur; /* ends cur list of Contexts */
|
|
Context **estp; /* ends step list of Contexts */
|
|
size_t rmlen; /* length of rm[] in each Context */
|
|
size_t rmmin; /* minimum length needed */
|
|
size_t used; /* length used for this libuxre_regnfaexec() */
|
|
w_type beg; /* nonzero for fixed char initial node NFAs */
|
|
};
|
|
|
|
#define ROP_MTOR ROP_CAT /* ROP_OR, except might be empty loop */
|
|
|
|
/*
|
|
* Depth first traversal.
|
|
* Make a singly linked list (in alt.ptr) of the graph's nodes.
|
|
* Must toss any ROP_BKTs, too, since "alt" is overwritten.
|
|
*/
|
|
static void
|
|
deltolist(Graph *gp, Graph **list)
|
|
{
|
|
Graph *ptr;
|
|
|
|
if ((ptr = gp->next) != 0) /* first time */
|
|
{
|
|
gp->next = 0;
|
|
if (gp->op == ROP_OR || gp->op == ROP_MTOR)
|
|
deltolist(gp->alt.ptr, list);
|
|
deltolist(ptr, list);
|
|
if (gp->op == ROP_BKT)
|
|
{
|
|
libuxre_bktfree(gp->alt.info.bkt);
|
|
free(gp->alt.info.bkt);
|
|
}
|
|
}
|
|
else if (gp->op == ROP_END)
|
|
gp->op = ROP_NOP;
|
|
else
|
|
return;
|
|
gp->alt.ptr = *list;
|
|
*list = gp;
|
|
}
|
|
|
|
/*
|
|
* After the list is turned into a linked list,
|
|
* walk that list freeing the nodes.
|
|
*/
|
|
static void
|
|
delgraph(Graph *gp)
|
|
{
|
|
Graph *gp2, end;
|
|
|
|
gp2 = &end;
|
|
deltolist(gp, &gp2);
|
|
while ((gp = gp2) != &end)
|
|
{
|
|
gp2 = gp->alt.ptr;
|
|
free(gp);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Depth first traversal.
|
|
* Look for ROP_NOPs and prune them from the graph.
|
|
* Chain them all together on *nop's list.
|
|
*/
|
|
static Graph *
|
|
nopskip(Graph *gp, Graph **nop)
|
|
{
|
|
Graph *ptr;
|
|
|
|
if ((ptr = gp->next) != 0) /* might have yet to do this subgraph */
|
|
{
|
|
if (gp->op == ROP_NOP)
|
|
{
|
|
if (gp->alt.ptr != 0) /* touched */
|
|
return gp->next; /* already did it */
|
|
gp->alt.ptr = *nop;
|
|
*nop = gp;
|
|
}
|
|
gp->next = 0; /* this subgraph's pending */
|
|
if (gp->op == ROP_OR || gp->op == ROP_MTOR)
|
|
gp->alt.ptr = nopskip(gp->alt.ptr, nop);
|
|
gp->next = nopskip(ptr, nop);
|
|
if (gp->op == ROP_NOP)
|
|
return gp->next;
|
|
}
|
|
return gp;
|
|
}
|
|
|
|
/*
|
|
* Postorder traversal of the parse tree.
|
|
* Build a graph using "Thompson's" algorithm.
|
|
* The only significant modification is the
|
|
* ROP_BRACE->ROP_MTOR construction.
|
|
* Returns 1 => graph might match empty
|
|
* 0 => graph cannot match empty
|
|
* -1 => error (in allocation)
|
|
*/
|
|
static int
|
|
mkgraph(Tree *tp, Graph **first, Graph **last)
|
|
{
|
|
Graph *new = 0, *nop, *lf, *ll, *rf, *rl;
|
|
int lmt, rmt = 0;
|
|
|
|
if (tp->op != ROP_CAT)
|
|
{
|
|
if ((new = malloc(sizeof(Graph))) == 0)
|
|
return 0;
|
|
new->op = tp->op; /* usually */
|
|
}
|
|
switch (tp->op)
|
|
{
|
|
case ROP_REF:
|
|
new->alt.info.sub = tp->right.info.sub;
|
|
*first = new;
|
|
*last = new;
|
|
return 1; /* safe--can't really tell */
|
|
case ROP_BKT:
|
|
tp->op = ROP_BKTCOPY; /* now graph owns clean up */
|
|
/*FALLTHROUGH*/
|
|
case ROP_BKTCOPY:
|
|
new->alt.info.bkt = tp->right.info.bkt;
|
|
/*FALLTHROUGH*/
|
|
default:
|
|
*first = new;
|
|
*last = new;
|
|
return 0;
|
|
case ROP_EMPTY:
|
|
new->op = ROP_NOP;
|
|
new->alt.ptr = 0; /* untouched */
|
|
*first = new;
|
|
*last = new;
|
|
return 1;
|
|
case ROP_OR:
|
|
case ROP_CAT:
|
|
lf = 0; /* in case of error */
|
|
if ((rmt = mkgraph(tp->right.ptr, &rf, &rl)) < 0)
|
|
goto err;
|
|
/*FALLTHROUGH*/
|
|
case ROP_STAR:
|
|
case ROP_PLUS:
|
|
case ROP_QUEST:
|
|
case ROP_BRACE:
|
|
case ROP_LP:
|
|
if ((lmt = mkgraph(tp->left.ptr, &lf, &ll)) < 0)
|
|
goto err;
|
|
break;
|
|
}
|
|
/*
|
|
* Note that ROP_NOP only serves as the node that reconnects
|
|
* the two choices of an incoming ROP_OR or ROP_QUEST. To
|
|
* prevent rewalking portions of the graph in nopskip(),
|
|
* this code marks all ROP_NOP nodes as currently untouched.
|
|
*/
|
|
switch (tp->op)
|
|
{
|
|
case ROP_OR:
|
|
if ((nop = malloc(sizeof(Graph))) == 0)
|
|
goto err;
|
|
nop->op = ROP_NOP;
|
|
nop->alt.ptr = 0; /* untouched */
|
|
ll->next = nop;
|
|
rl->next = nop;
|
|
new->next = lf;
|
|
new->alt.ptr = rf;
|
|
*first = new;
|
|
*last = nop;
|
|
return lmt | rmt;
|
|
case ROP_CAT: /* no "new" */
|
|
ll->next = rf;
|
|
*first = lf;
|
|
*last = rl;
|
|
return lmt & rmt;
|
|
case ROP_QUEST:
|
|
if ((nop = malloc(sizeof(Graph))) == 0)
|
|
goto err;
|
|
nop->op = ROP_NOP;
|
|
nop->alt.ptr = 0; /* untouched */
|
|
new->op = ROP_OR;
|
|
new->next = lf;
|
|
new->alt.ptr = nop;
|
|
ll->next = nop;
|
|
*first = new;
|
|
*last = nop;
|
|
return 1;
|
|
case ROP_STAR:
|
|
*first = new;
|
|
rmt = 1;
|
|
star:;
|
|
new->op = lmt ? ROP_MTOR : ROP_OR;
|
|
new->alt.ptr = lf;
|
|
ll->next = new;
|
|
*last = new;
|
|
return rmt;
|
|
case ROP_PLUS:
|
|
*first = lf;
|
|
rmt = lmt;
|
|
goto star;
|
|
case ROP_BRACE:
|
|
if ((nop = malloc(sizeof(Graph))) == 0)
|
|
goto err;
|
|
nop->op = ROP_MTOR; /* going to save state anyway... */
|
|
nop->alt.ptr = lf;
|
|
ll->next = new;
|
|
new->next = nop;
|
|
new->alt.info.num[1] = tp->right.info.num[1];
|
|
if ((new->alt.info.num[0] = tp->right.info.num[0]) == 0)
|
|
{
|
|
lmt = 1;
|
|
*first = new;
|
|
}
|
|
else
|
|
{
|
|
new->alt.info.num[0]--; /* already done 1 */
|
|
if (new->alt.info.num[1] != BRACE_INF)
|
|
new->alt.info.num[1]--; /* likewise */
|
|
*first = lf;
|
|
}
|
|
*last = nop;
|
|
return lmt;
|
|
case ROP_LP:
|
|
if ((nop = malloc(sizeof(Graph))) == 0)
|
|
goto err;
|
|
nop->op = ROP_RP;
|
|
nop->alt.info.sub = tp->right.info.sub;
|
|
new->alt.info.sub = tp->right.info.sub;
|
|
new->next = lf;
|
|
ll->next = nop;
|
|
*first = new;
|
|
*last = nop;
|
|
return lmt;
|
|
}
|
|
err:;
|
|
if (KIND_ROP(tp->op) == BINARY_ROP && rf != 0)
|
|
delgraph(rf);
|
|
if (lf != 0)
|
|
delgraph(lf);
|
|
if (tp->op != ROP_CAT)
|
|
free(new);
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* Semi-preorder traversal.
|
|
* Return zero if there's no simple first character
|
|
* (including the operation ROP_BOL) that must always
|
|
* be at the start of a matching string.
|
|
* This code doesn't attempt to get an answer if the
|
|
* first of the tree many be empty.
|
|
*/
|
|
static w_type
|
|
firstop(Tree *tp)
|
|
{
|
|
w_type op;
|
|
|
|
switch (tp->op)
|
|
{
|
|
case ROP_OR:
|
|
if ((op = firstop(tp->left.ptr)) == 0
|
|
|| op != firstop(tp->right.ptr))
|
|
{
|
|
return 0;
|
|
}
|
|
return op;
|
|
case ROP_BRACE:
|
|
if (tp->right.info.num[0] == 0)
|
|
return 0;
|
|
/*FALLTHROUGH*/
|
|
case ROP_CAT:
|
|
case ROP_PLUS:
|
|
case ROP_LP:
|
|
return firstop(tp->left.ptr);
|
|
default:
|
|
if (tp->op < 0)
|
|
return 0;
|
|
/*FALLTHROUGH*/
|
|
case ROP_BOL:
|
|
return tp->op;
|
|
}
|
|
}
|
|
|
|
void
|
|
libuxre_regdelnfa(Nfa *np)
|
|
{
|
|
Context *cp, *cpn;
|
|
Stack *sp, *spn;
|
|
|
|
if (np->gp != 0)
|
|
delgraph(np->gp);
|
|
for (cp = np->allcp; cp != 0; cp = cpn)
|
|
{
|
|
cpn = cp->link;
|
|
free(cp);
|
|
}
|
|
for (sp = np->allsp; sp != 0; sp = spn)
|
|
{
|
|
spn = sp->link;
|
|
free(sp);
|
|
}
|
|
free(np);
|
|
}
|
|
|
|
LIBUXRE_STATIC int
|
|
libuxre_regnfacomp(regex_t *ep, Tree *tp, Lex *lxp)
|
|
{
|
|
Graph *gp, end;
|
|
Nfa *np;
|
|
|
|
if ((np = malloc(sizeof(Nfa))) == 0)
|
|
goto err;
|
|
np->gp = 0; /* in case of error */
|
|
if (mkgraph(tp, &np->gp, &gp) < 0)
|
|
goto err;
|
|
gp->next = 0; /* nothing follows ROP_END */
|
|
np->rmlen = 0;
|
|
if ((ep->re_flags & REG_NOSUB) == 0)
|
|
np->rmlen = ep->re_nsub + 1;
|
|
np->rmmin = 0;
|
|
if (lxp->maxref != 0 && (np->rmmin = lxp->maxref + 1) > np->rmlen)
|
|
np->rmlen = np->rmmin;
|
|
/*
|
|
* Delete all ROP_NOPs from the graph.
|
|
* nopskip() disconnects them from the graph and
|
|
* links them together through their alt.ptr's.
|
|
*/
|
|
gp = &end;
|
|
np->gp = nopskip(np->gp, &gp);
|
|
while (gp != &end)
|
|
{
|
|
Graph *gp2 = gp;
|
|
|
|
gp = gp->alt.ptr;
|
|
free(gp2);
|
|
}
|
|
np->sp = 0;
|
|
np->allsp = 0;
|
|
np->avail = 0;
|
|
np->allcp = 0;
|
|
ep->re_nfa = np;
|
|
np->beg = firstop(tp);
|
|
return 0;
|
|
err:;
|
|
if (np != 0)
|
|
{
|
|
if (np->gp != 0)
|
|
delgraph(np->gp);
|
|
free(np);
|
|
}
|
|
return REG_ESPACE;
|
|
}
|
|
|
|
static Stack *
|
|
newstck(Nfa *np)
|
|
{
|
|
Stack *sp, **spp;
|
|
int i;
|
|
|
|
if ((sp = np->sp) == 0) /* get more */
|
|
{
|
|
spp = &np->sp;
|
|
i = 4;
|
|
while ((sp = malloc(sizeof(Stack))) != 0)
|
|
{
|
|
sp->link = np->allsp;
|
|
np->allsp = sp;
|
|
*spp = sp;
|
|
spp = &sp->prev;
|
|
if (--i == 0)
|
|
break;
|
|
}
|
|
*spp = 0;
|
|
if ((sp = np->sp) == 0) /* first malloc failed */
|
|
return 0;
|
|
}
|
|
np->sp = sp->prev;
|
|
return sp;
|
|
}
|
|
|
|
static int
|
|
mkstck(Nfa *np, Context *cp, Graph *gp)
|
|
{
|
|
Stack *new, *sp;
|
|
|
|
if (gp == 0) /* copy existing stack tail */
|
|
{
|
|
/*
|
|
* Hoist up top of stack.
|
|
*/
|
|
new = cp->sp;
|
|
cp->wasgp = new->wasgp;
|
|
cp->str = new->str;
|
|
cp->cnt = new->cnt;
|
|
cp->sp = new->prev;
|
|
if ((sp = new->prev) == 0) /* only one below */
|
|
{
|
|
new->prev = np->sp;
|
|
np->sp = new;
|
|
cp->sp = 0;
|
|
return 0;
|
|
}
|
|
for (;;) /* copy the rest; reusing the old top */
|
|
{
|
|
new->wasgp = sp->wasgp;
|
|
new->str = sp->str;
|
|
new->cnt = sp->cnt;
|
|
if ((new->prev = sp->prev) == 0)
|
|
break;
|
|
if ((new->prev = newstck(np)) == 0)
|
|
return REG_ESPACE;
|
|
new = new->prev;
|
|
sp = sp->prev;
|
|
}
|
|
return 0;
|
|
}
|
|
if (cp->wasgp != 0) /* push current down */
|
|
{
|
|
if ((new = newstck(np)) == 0)
|
|
return REG_ESPACE;
|
|
new->prev = cp->sp;
|
|
cp->sp = new;
|
|
new->wasgp = cp->wasgp;
|
|
new->str = cp->str;
|
|
new->cnt = cp->cnt;
|
|
}
|
|
cp->wasgp = gp;
|
|
cp->str = 0;
|
|
cp->cnt = 0;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Allocate a new Context (from np->avail)
|
|
* and add it to the end of the current list.
|
|
*/
|
|
static int
|
|
newctxt(Nfa *np, Context *cp, Graph *gp)
|
|
{
|
|
Context *new;
|
|
size_t n;
|
|
|
|
if ((new = np->avail) == 0) /* need more */
|
|
{
|
|
Context *ncp, **cpp;
|
|
int i;
|
|
|
|
/*
|
|
* Can't easily allocate Contexts in one call because
|
|
* the alignments (given the varying length of rm[])
|
|
* are potentially nontrivial.
|
|
*/
|
|
n = offsetof(Context, rm) + np->rmlen * sizeof(regmatch_t);
|
|
i = 4;
|
|
cpp = &np->avail;
|
|
while ((ncp = malloc(n)) != 0)
|
|
{
|
|
ncp->link = np->allcp;
|
|
np->allcp = ncp;
|
|
*cpp = ncp;
|
|
cpp = &ncp->next;
|
|
if (--i == 0)
|
|
break;
|
|
}
|
|
*cpp = 0;
|
|
if ((new = np->avail) == 0) /* first malloc failed */
|
|
return REG_ESPACE;
|
|
}
|
|
np->avail = new->next;
|
|
new->next = 0;
|
|
new->gp = gp;
|
|
new->sp = 0;
|
|
new->wasgp = 0;
|
|
new->nset = 0;
|
|
if (cp != 0) /* copy existing context information */
|
|
{
|
|
if (cp->sp != 0) /* copy tail of stack */
|
|
{
|
|
new->sp = cp->sp;
|
|
if (mkstck(np, new, 0) != 0)
|
|
return REG_ESPACE;
|
|
}
|
|
new->wasgp = cp->wasgp;
|
|
new->str = cp->str;
|
|
new->cnt = cp->cnt;
|
|
/*
|
|
* Copy any valid subexpression match information
|
|
* from the existing context.
|
|
*/
|
|
if (np->used != 0 && (n = cp->nset) != 0)
|
|
{
|
|
regmatch_t *rmn = new->rm, *rmo = cp->rm;
|
|
|
|
new->nset = n;
|
|
for (;; ++rmn, ++rmo)
|
|
{
|
|
rmn->rm_so = rmo->rm_so;
|
|
rmn->rm_eo = rmo->rm_eo;
|
|
if (--n == 0)
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
/*
|
|
* Append it to the end of the current Context list.
|
|
*/
|
|
*np->ecur = new;
|
|
np->ecur = &new->next;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Compare two byte string sequences for equality.
|
|
* If REG_ICASE, walk through the strings doing
|
|
* caseless comparisons of the wide characters.
|
|
*/
|
|
static int
|
|
casecmp(const Uchar *s, Exec *xp, ssize_t i, ssize_t n, int mb_cur_max)
|
|
{
|
|
const Uchar *p = &xp->str[i];
|
|
const Uchar *end;
|
|
w_type wc1, wc2;
|
|
int k;
|
|
|
|
if (strncmp((char *)s, (char *)p, n) == 0) /* try for exact match */
|
|
return 1;
|
|
if ((xp->flags & REG_ICASE) == 0)
|
|
return 0;
|
|
/*
|
|
* Walk through each testing for a match, ignoring case,
|
|
* of the resulting wide characters.
|
|
* Note that only "s" can run out of characters.
|
|
*/
|
|
end = &p[n];
|
|
do
|
|
{
|
|
if ((wc1 = *s++) == '\0')
|
|
return 0;
|
|
if (!ISONEBYTE(wc1) && (k = libuxre_mb2wc(&wc1, s)) > 0)
|
|
s += k;
|
|
if (!ISONEBYTE(wc2 = *p++) && (k = libuxre_mb2wc(&wc2, p)) > 0)
|
|
p += k;
|
|
if (wc1 != wc2)
|
|
{
|
|
wc1 = to_lower(wc1);
|
|
wc2 = to_lower(wc2);
|
|
if (wc1 != wc2)
|
|
return 0;
|
|
}
|
|
} while (p < end);
|
|
return 1;
|
|
}
|
|
|
|
LIBUXRE_STATIC int
|
|
libuxre_regnfaexec(Nfa *np, Exec *xp)
|
|
{
|
|
const Uchar *s, *s1, *s2;
|
|
Context *cp, *cpn;
|
|
Graph *gp, *brace;
|
|
Stack *sp, *spn;
|
|
ssize_t rmso, len;
|
|
int i, ret, mb_cur_max;
|
|
w_type wc;
|
|
size_t n;
|
|
|
|
ret = 0; /* assume it matches */
|
|
rmso = -1; /* but no match yet */
|
|
np->cur = 0;
|
|
np->step = 0;
|
|
np->ecur = &np->cur;
|
|
np->estp = &np->step;
|
|
if ((np->used = xp->nmatch) < np->rmmin)
|
|
np->used = np->rmmin;
|
|
s1 = 0; /* one char back */
|
|
s = xp->str; /* current high water in string */
|
|
mb_cur_max = xp->mb_cur_max;
|
|
for (;;)
|
|
{
|
|
/*
|
|
* Get next character from string.
|
|
* If the engine proper hasn't started and the engine
|
|
* requires a particular character to start and this
|
|
* character isn't it, try the next one.
|
|
*/
|
|
for (;;)
|
|
{
|
|
s2 = s1;
|
|
s1 = s;
|
|
if (!ISONEBYTE(wc = *s++) &&
|
|
(i = libuxre_mb2wc(&wc, s)) > 0)
|
|
s += i;
|
|
if (np->cur != 0 || np->beg == wc || np->beg == 0)
|
|
break;
|
|
if (np->beg == ROP_BOL)
|
|
{
|
|
if (s2 == 0 && (xp->flags & REG_NOTBOL) == 0)
|
|
break;
|
|
if ((xp->flags & REG_NEWLINE) == 0)
|
|
goto nomatch;
|
|
if (s2 != 0 && *s2 == '\n')
|
|
break;
|
|
}
|
|
if (wc == '\0')
|
|
goto nomatch;
|
|
}
|
|
/*
|
|
* Start the engine by inserting a fresh initial context
|
|
* if there's no known match as yet. (Once some match
|
|
* has been found, the end is near.)
|
|
*/
|
|
if (rmso < 0 && newctxt(np, 0, np->gp) != 0)
|
|
goto err;
|
|
/*
|
|
* Walk the current Contexts list, trying each.
|
|
* "loop" is when a new Context is to be tried,
|
|
* "again" is when the same Context continues,
|
|
* but wc was not yet matched.
|
|
*/
|
|
cp = np->cur;
|
|
loop:;
|
|
gp = cp->gp;
|
|
again:;
|
|
switch (gp->op)
|
|
{
|
|
case ROP_BRACE: /* gp->next->op == ROP_MTOR */
|
|
brace = gp;
|
|
gp = gp->next;
|
|
goto mtor;
|
|
case ROP_MTOR:
|
|
brace = 0;
|
|
mtor:;
|
|
if (cp->wasgp != gp) /* first time */
|
|
{
|
|
if (mkstck(np, cp, gp) != 0)
|
|
goto err;
|
|
}
|
|
else if (cp->str == s) /* spinning */
|
|
goto poptonext;
|
|
cp->str = s;
|
|
if (brace != 0)
|
|
{
|
|
if (cp->cnt >= brace->alt.info.num[1])
|
|
goto poptonext;
|
|
if (++cp->cnt <= brace->alt.info.num[0])
|
|
{
|
|
gp = gp->alt.ptr;
|
|
goto again;
|
|
}
|
|
if (cp->cnt > BRACE_MAX)
|
|
cp->cnt = BRACE_MAX;
|
|
}
|
|
if (newctxt(np, cp, gp->alt.ptr) != 0)
|
|
goto err;
|
|
poptonext:;
|
|
cp->wasgp = 0;
|
|
if ((sp = cp->sp) != 0) /* pop stack */
|
|
{
|
|
cp->sp = sp->prev;
|
|
cp->wasgp = sp->wasgp;
|
|
cp->str = sp->str;
|
|
cp->cnt = sp->cnt;
|
|
sp->prev = np->sp;
|
|
np->sp = sp;
|
|
}
|
|
/*FALLTHROUGH*/
|
|
case ROP_EMPTY:
|
|
tonext:;
|
|
gp = gp->next;
|
|
goto again;
|
|
case ROP_OR:
|
|
if (newctxt(np, cp, gp->alt.ptr) != 0)
|
|
goto err;
|
|
goto tonext;
|
|
case ROP_LP:
|
|
if ((n = gp->alt.info.sub) < np->used)
|
|
{
|
|
size_t k;
|
|
|
|
cp->rm[n].rm_so = s1 - xp->str;
|
|
cp->rm[n].rm_eo = -1;
|
|
/*
|
|
* Mark any skipped subexpressions as
|
|
* failing to participate in the match.
|
|
*/
|
|
if ((k = cp->nset) < n)
|
|
{
|
|
regmatch_t *rmp = &cp->rm[k];
|
|
|
|
for (;; rmp++)
|
|
{
|
|
rmp->rm_so = -1;
|
|
rmp->rm_eo = -1;
|
|
if (++k >= n)
|
|
break;
|
|
}
|
|
}
|
|
cp->nset = n + 1;
|
|
}
|
|
goto tonext;
|
|
case ROP_RP:
|
|
if ((n = gp->alt.info.sub) < np->used)
|
|
cp->rm[n].rm_eo = s1 - xp->str;
|
|
goto tonext;
|
|
case ROP_BOL:
|
|
if (s2 == 0)
|
|
{
|
|
if (xp->flags & REG_NOTBOL)
|
|
goto failed;
|
|
}
|
|
else if ((xp->flags & REG_NEWLINE) == 0 || *s2 != '\n')
|
|
goto failed;
|
|
goto tonext;
|
|
case ROP_EOL:
|
|
if (wc == '\0')
|
|
{
|
|
if (xp->flags & REG_NOTEOL)
|
|
goto failed;
|
|
}
|
|
else if ((xp->flags & REG_NEWLINE) == 0 || wc != '\n')
|
|
goto failed;
|
|
goto tonext;
|
|
default: /* character match */
|
|
if (gp->op != wc)
|
|
{
|
|
if ((xp->flags & REG_ICASE) == 0
|
|
|| gp->op != to_lower(wc))
|
|
{
|
|
goto failed;
|
|
}
|
|
}
|
|
nextwc:;
|
|
cp->gp = gp->next;
|
|
tostep:;
|
|
cpn = cp->next;
|
|
cp->next = 0;
|
|
*np->estp = cp;
|
|
np->estp = &cp->next;
|
|
if ((cp = cpn) == 0)
|
|
break;
|
|
goto loop;
|
|
case ROP_NOTNL:
|
|
if (wc == '\n')
|
|
goto failed;
|
|
/*FALLTHROUGH*/
|
|
case ROP_ANYCH:
|
|
if (wc > '\0')
|
|
goto nextwc;
|
|
/*FALLTHROUGH*/
|
|
case ROP_NONE:
|
|
failed:;
|
|
cpn = cp->next;
|
|
cp->next = np->avail;
|
|
np->avail = cp;
|
|
if ((cp = cpn) == 0)
|
|
break;
|
|
goto loop;
|
|
case ROP_LT:
|
|
if (s2 == 0)
|
|
{
|
|
if (xp->flags & REG_NOTBOL)
|
|
goto failed;
|
|
}
|
|
else
|
|
{
|
|
w_type pwc;
|
|
|
|
if (wc != '_' &&
|
|
!iswalnum(mb_cur_max == 1 ? btowc(wc) : wc))
|
|
goto failed;
|
|
if (!ISONEBYTE(pwc = *s2))
|
|
libuxre_mb2wc(&pwc, &s2[1]);
|
|
if (pwc == '_' ||
|
|
iswalnum(mb_cur_max== 1 ? btowc(pwc) : pwc))
|
|
goto failed;
|
|
}
|
|
goto tonext;
|
|
case ROP_GT:
|
|
if (wc == '_' ||
|
|
iswalnum(mb_cur_max == 1 ? btowc(wc) : wc))
|
|
goto failed;
|
|
goto tonext;
|
|
case ROP_BKT:
|
|
case ROP_BKTCOPY:
|
|
if (cp->wasgp == gp) /* rest of MCCE */
|
|
{
|
|
checkspin:;
|
|
if (s1 >= cp->str) /* got it all */
|
|
goto poptonext;
|
|
goto tostep;
|
|
}
|
|
if ((i = libuxre_bktmbexec(gp->alt.info.bkt, wc, s,
|
|
mb_cur_max)) < 0)
|
|
goto failed;
|
|
if ((n = i) == 0) /* only matched wc */
|
|
goto nextwc;
|
|
spin:;
|
|
if (mkstck(np, cp, gp) != 0)
|
|
goto err;
|
|
cp->gp = gp; /* stay here until reach past s+n */
|
|
cp->str = s + n;
|
|
goto tostep;
|
|
case ROP_REF:
|
|
if (cp->wasgp == gp) /* rest of matched string */
|
|
goto checkspin;
|
|
if ((n = gp->alt.info.sub) >= cp->nset)
|
|
goto failed;
|
|
if ((len = cp->rm[n].rm_eo) < 0)
|
|
goto failed;
|
|
if ((len -= n = cp->rm[n].rm_so) == 0)
|
|
goto tonext;
|
|
if (casecmp(s1, xp, n, len, mb_cur_max) == 0)
|
|
goto failed;
|
|
if ((n = s - s1) >= len)
|
|
goto nextwc;
|
|
n = len - n;
|
|
goto spin;
|
|
case ROP_END: /* success! */
|
|
if (xp->flags & REG_NONEMPTY)
|
|
{
|
|
if (s2 == 0)
|
|
goto failed;
|
|
}
|
|
if (xp->nmatch == 0)
|
|
goto match;
|
|
/*
|
|
* Mark any skipped subexpressions as failing to match.
|
|
*/
|
|
if ((n = cp->nset) < xp->nmatch)
|
|
{
|
|
do
|
|
{
|
|
cp->rm[n].rm_so = -1;
|
|
cp->rm[n].rm_eo = -1;
|
|
} while (++n < xp->nmatch);
|
|
}
|
|
/*
|
|
* Note the left-most match that's longest.
|
|
*/
|
|
n = cp->rm[0].rm_so;
|
|
if (rmso < 0 || n < rmso)
|
|
{
|
|
rmso = n;
|
|
record:;
|
|
memcpy(xp->match, cp->rm,
|
|
xp->nmatch * sizeof(regmatch_t));
|
|
goto failed;
|
|
}
|
|
if (rmso < n || xp->match[0].rm_eo > cp->rm[0].rm_eo)
|
|
goto failed;
|
|
if (xp->match[0].rm_eo < cp->rm[0].rm_eo)
|
|
goto record;
|
|
#if 0 /* maximize the lengths of earlier LP...RPs */
|
|
/*
|
|
* If both are of the same length and start
|
|
* at the same point, choose the one with
|
|
* a "longest submatch from left to right"
|
|
* where an empty string wins over a nonmatch.
|
|
*/
|
|
for (n = 1; n < xp->nmatch; n++)
|
|
{
|
|
ssize_t nlen;
|
|
|
|
/*
|
|
* First, go with the choice that has any
|
|
* match for subexpr n.
|
|
*/
|
|
len = xp->match[n].rm_eo;
|
|
nlen = cp->rm[n].rm_eo;
|
|
if (nlen < 0)
|
|
{
|
|
if (len >= 0)
|
|
break;
|
|
}
|
|
else if (len < 0)
|
|
goto record;
|
|
/*
|
|
* Both have a match; go with the longer.
|
|
*/
|
|
len -= xp->match[n].rm_so;
|
|
nlen -= cp->rm[n].rm_so;
|
|
if (nlen < len)
|
|
break;
|
|
if (nlen > len)
|
|
goto record;
|
|
}
|
|
#else /* take LP and RP as "fence posts" and maximize earlier gaps */
|
|
/*
|
|
* If both are of the same length and start
|
|
* at the same point, choose the one with
|
|
* the larger earlier subpatterns, in which
|
|
* each rm_so and rm_eo serves as a separator.
|
|
*/
|
|
for (n = 1; n < xp->nmatch; n++)
|
|
{
|
|
ssize_t nlen;
|
|
int use;
|
|
|
|
if (xp->flags & REG_AVOIDNULL) {
|
|
/*
|
|
* This is to to satisfy POSIX.1-2001
|
|
* XBD pp. 172-173 ll. 6127-6129, whose
|
|
* translation is "do not match null
|
|
* expressions if there is a choice".
|
|
* See also POSIX.2 interpretation #43
|
|
* in which the question was raised.
|
|
*
|
|
* The first subexpression of "\(x*\)*"
|
|
* must thus match the string "xxx".
|
|
*/
|
|
use = cp->rm[n].rm_eo -
|
|
cp->rm[n].rm_so >=
|
|
xp->match[n].rm_eo -
|
|
xp->match[n].rm_so ||
|
|
xp->match[n].rm_so < 0;
|
|
} else
|
|
use = 1;
|
|
/*
|
|
* Choose the rightmost ROP_LP as that
|
|
* maximizes the gap from before.
|
|
*/
|
|
len = xp->match[n].rm_so;
|
|
nlen = cp->rm[n].rm_so;
|
|
if (len < nlen && use)
|
|
goto record;
|
|
if (len > nlen)
|
|
break;
|
|
/*
|
|
* The ROP_LPs are at the same point:
|
|
* Choose the rightmost ROP_RP.
|
|
*/
|
|
len = xp->match[n].rm_eo;
|
|
nlen = cp->rm[n].rm_eo;
|
|
if (len < nlen && use)
|
|
goto record;
|
|
if (len > nlen)
|
|
break;
|
|
}
|
|
#endif
|
|
goto failed;
|
|
}
|
|
/*
|
|
* Finished the current Context list. If the input string
|
|
* has been entirely scanned, we're done. Otherwise, make
|
|
* the next step list current for the next character.
|
|
* If the next step list was empty and there's an existing
|
|
* match, that's the left-most longest.
|
|
*/
|
|
if (wc == '\0')
|
|
{
|
|
if (rmso >= 0)
|
|
goto match;
|
|
goto nomatch;
|
|
}
|
|
np->ecur = np->estp;
|
|
if ((np->cur = np->step) == 0)
|
|
{
|
|
if (rmso >= 0)
|
|
goto match;
|
|
np->ecur = &np->cur; /* was pointing at step */
|
|
}
|
|
np->step = 0;
|
|
np->estp = &np->step;
|
|
}
|
|
nomatch:;
|
|
ret = REG_NOMATCH;
|
|
match:;
|
|
np->avail = 0;
|
|
for (cp = np->allcp; cp != 0; cp = cpn)
|
|
{
|
|
cpn = cp->link;
|
|
cp->next = np->avail;
|
|
np->avail = cp;
|
|
}
|
|
np->sp = 0;
|
|
for (sp = np->allsp; sp != 0; sp = spn)
|
|
{
|
|
spn = sp->link;
|
|
sp->prev = np->sp;
|
|
np->sp = sp;
|
|
}
|
|
return ret;
|
|
err:;
|
|
ret = REG_ESPACE;
|
|
goto match;
|
|
}
|