diff --git a/Changes b/Changes index fcf55ef..4662a1e 100644 --- a/Changes +++ b/Changes @@ -1,4 +1,5 @@ Release ... +* Traditional regular expressions can now be used with multibyte characters. * If a line began with a tabulator and another tabulator was inserted with the cursor located on the first tabulator, the display was not updated appropriately since the last release (Bugreport by Matthew Fischer). (P) diff --git a/Makefile b/Makefile index cb3a0cb..b5988fd 100644 --- a/Makefile +++ b/Makefile @@ -72,7 +72,7 @@ # # from Makefile 7.13.1.3 (2.11BSD GTE) 1996/10/23 # -# @(#)Makefile 1.47 (gritter) 2/19/05 +# @(#)Makefile 1.49 (gritter) 2/19/05 # # @@ -151,13 +151,15 @@ FEATURES = -DLISPCODE -DCHDIR -DFASTTAG -DUCVISUAL -DMB -DBIT8 #LANGMSG = -DLANGMSG -DCATNAME='"UNKNOWN"' # -# For multibyte character support in regular expressions, and for the -# features of localized regular expressions ([:class:], [.c.], [=c=], -# \(re\)*, \(re\)\{m,n\}), you need Caldera's 'UNIX(R) Regular Expression -# Library' or a derivative of it. Comment out the three following lines if -# you don't have it or if it does not compile; it needs some advanced -# multibyte character support (wchar.h, wctype.h, btowc() etc.) which is -# not provided by older compilation environments. +# For POSIX regular expressions, e.g. the star applied to subexpressions +# as in \(ab\)* and localized regular expressions like [:class:], [.c.], +# and [=c=], you need Caldera's 'UNIX(R) Regular Expression Library' or +# the included derivative of it. +# +# Comment out the three following lines if you do not have it or if it +# does not compile; it needs some advanced multibyte character support +# (wchar.h, wctype.h, btowc() etc.) which is not provided by older +# compilation environments. # REINC = -I./libuxre -DUXRE RELIB = -L./libuxre -luxre diff --git a/TODO b/TODO index 973c7f5..aaf0564 100644 --- a/TODO +++ b/TODO @@ -9,8 +9,4 @@ TODO list for ex - SVr4 ex probably has some silent features that this one should have too. -- The traditional regular expression code in ex_re.c could be updated to - work with multibyte characters. This would mostly involve to take the - code from libcommon/regexp.h of the Heirloom Toolchest. - Gunnar Ritter 2/19/05 diff --git a/ex_proto.h b/ex_proto.h index 6012743..6af8c18 100644 --- a/ex_proto.h +++ b/ex_proto.h @@ -70,7 +70,7 @@ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * @(#)ex_proto.h 1.27 (gritter) 2/15/05 + * @(#)ex_proto.h 1.28 (gritter) 2/19/05 */ /* @@ -245,9 +245,6 @@ extern int compile(int, int); extern int same(register int, register int); extern int ecmp(register char *, register char *, register int); extern int execute(int, line *); -extern void getrnge(register char *); -extern int advance(register char *, register char *); -extern int cclass(register char *, register int, int); /* ex_set.c */ extern void set(void); extern int setend(void); diff --git a/ex_re.c b/ex_re.c index 2f24659..cd33bdc 100644 --- a/ex_re.c +++ b/ex_re.c @@ -73,7 +73,7 @@ #ifndef lint #ifdef DOSCCS -static char sccsid[] = "@(#)ex_re.c 1.46 (gritter) 2/18/05"; +static char sccsid[] = "@(#)ex_re.c 1.47 (gritter) 2/19/05"; #endif #endif @@ -82,6 +82,86 @@ static char sccsid[] = "@(#)ex_re.c 1.46 (gritter) 2/18/05"; #include "ex.h" #include "ex_re.h" +#ifdef UXRE +char *braslist[NBRA]; +char *braelist[NBRA]; +#else /* !UXRE */ +static int regerrno; + +#define INIT register char *sp = instring; +#define GETC() (*sp++) +#define PEEKC() (*sp) +#define UNGETC(c) (--sp) +#define RETURN(c) return (c); +#define ERROR(c) { regerrno = c; return 0; } + +#define compile(a, b, c, d) _compile(a, b, c, d) +#define regexp_h_static static + +#ifndef NO_BE_BACKSLASH +#define REGEXP_H_VI_BACKSLASH +#endif /* !NO_BE_BACKSLASH */ + +#ifdef MB +#define REGEXP_H_WCHARS +#endif /* MB */ + +#define REGEXP_H_USED_FROM_VI + +#include "regexp.h" + +static size_t +loconv(register char *dst, register const char *src) +{ + char *odst = dst; + +#ifdef MB + if (mb_cur_max > 1) { + char mb[MB_LEN_MAX]; + wchar_t wc; + int len, i, nlen; + + while (*src) { + if ((*src & 0200) == 0) { + *dst++ = tolower(*src); + src++; + } else if ((len = mbtowc(&wc, src, mb_cur_max)) <= 0) { + *dst++ = *src++; + } else { + wc = towlower(wc); + if (len >= mb_cur_max) { + if ((nlen = wctomb(dst, wc)) <= len) { + dst += nlen; + src += len; + } else { + *dst++ = *src++; + } + } else { + if ((nlen = wctomb(mb, wc)) <= len) { + src += len; + for (i = 0; i < nlen; i++) + *dst++ = mb[i]; + } else { + *dst++ = *src++; + } + } + } + } + } else +#endif /* MB */ + { + while (*src) { + *dst++ = tolower(*src & 0377); + src++; + } + } + return dst - odst; +} + +#undef compile + +#endif /* !UXRE */ + /* * Global, substitute and regular expressions. * Very similar to ed, with some re extensions and @@ -326,10 +406,10 @@ compsub(int ch) /* fall into ... */ case '&': redo: - if (re.Expbuf[0] == 0) + if (re.Patbuf[0] == 0) error(catgets(catd, 1, 127, "No previous re|No previous regular expression")); - if (subre.Expbuf[0] == 0) + if (subre.Patbuf[0] == 0) error(catgets(catd, 1, 128, "No previous substitute re|No previous substitute to repeat")); break; @@ -643,9 +723,9 @@ dosub(void) continue; } #ifndef BIT8 - if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') { + if (c < 0 && (c &= TRIM) >= '1' && c < re.Nbra + '1') { #else - if (q && c >= '1' && c < nbra + '1') { + if (q && c >= '1' && c < re.Nbra + '1') { #endif sp = place(sp, braslist[c - '1'], braelist[c - '1']); if (sp == 0) @@ -777,11 +857,10 @@ snote(register int total, register int lines) void cerror(char *s) { - expbuf[0] = '\0'; + re.Patbuf[0] = '\0'; error(s); } -#ifdef UXRE void refree(struct regexp *rp) { @@ -801,17 +880,18 @@ refree(struct regexp *rp) } if ((r1->Re_used == 0 || rp->Re_ident != r1->Re_ident) && (r2->Re_used == 0 || rp->Re_ident != r2->Re_ident)) - regfree(&rp->Re); +#ifdef UXRE + regfree(&rp->Expbuf); +#else /* !UXRE */ + free(rp->Expbuf); +#endif /* !UXRE */ rp->Re_used = 0; } -#endif struct regexp * savere(struct regexp *store) { -#ifdef UXRE refree(store); -#endif copy(store, &re, sizeof re); return store; } @@ -819,21 +899,18 @@ savere(struct regexp *store) struct regexp * resre(struct regexp *store) { -#ifdef UXRE refree(&re); -#endif copy(&re, store, sizeof re); return store; } -#ifdef UXRE int compile(int eof, int oknl) { int c, d, i, n; char mb[MB_LEN_MAX+1]; - char *p = re.Expbuf, *end = re.Expbuf + sizeof re.Expbuf; - int err = 0, nomagic = value(MAGIC) ? 0 : 1, esc; + char *p = re.Patbuf, *end = re.Patbuf + sizeof re.Patbuf; + int nomagic = value(MAGIC) ? 0 : 1, esc, rcnt = 0; char *rhsp; #ifdef BIT8 char *rhsq; @@ -847,13 +924,13 @@ compile(int eof, int oknl) switch (c) { case '/': case '?': - if (scanre.Expbuf[0] == 0) + if (scanre.Patbuf[0] == 0) error(catgets(catd, 1, 134, "No previous scan re|No previous scanning regular expression")); resre(&scanre); return c; case '&': - if (subre.Expbuf[0] == 0) + if (subre.Patbuf[0] == 0) error(catgets(catd, 1, 135, "No previous substitute re|No previous substitute regular expression")); resre(&subre); @@ -874,9 +951,9 @@ compile(int eof, int oknl) "No previous re|No previous regular expression")); return eof; } - nbra = circfl = 0; + re.Nbra = re.Circfl = 0; if (c == '^') - circfl++; + re.Circfl++; esc = 0; goto havec; /* @@ -963,6 +1040,7 @@ compile(int eof, int oknl) #endif } } else if (!esc && c == '[') { + rcnt++; /* * Search for the end of the bracket expression * since '~' may not be recognized inside. @@ -980,6 +1058,7 @@ compile(int eof, int oknl) if (p >= end) goto complex; } +#ifdef UXRE if (d == '[' && (c == ':' || c == '.' || c == '=')) { d = c; @@ -1001,6 +1080,7 @@ compile(int eof, int oknl) } c = EOF; /* -> reset d and continue */ } +#endif /* UXRE */ d = c; } while (c != ']'); } else if (esc && c == '{') { @@ -1040,33 +1120,94 @@ compile(int eof, int oknl) complex: cerror(catgets(catd, 1, 139, "Re too complex|Regular expression too complicated")); } - if (p == expbuf) + if (p == re.Patbuf) *p++ = '.'; /* approximate historical behavior */ *p = '\0'; refree(&re); +#ifdef UXRE c = REG_ANGLES | REG_BADRANGE; #ifndef NO_BE_BACKSLASH c |= REG_BKTESCAPE; #endif /* !NO_BE_BACKSLASH */ if (value(IGNORECASE)) c |= REG_ICASE; - if ((err = regcomp(&re.Re, re.Expbuf, c)) != 0) { - switch (err) { + if ((i = regcomp(&re.Expbuf, re.Patbuf, c)) != 0) { + switch (i) { case REG_EBRACK: miss: cerror(catgets(catd, 1, 154, "Missing ]")); + /*NOTREACHED*/ + break; default: - regerror(err, &re.Re, &re.Expbuf[1], - sizeof re.Expbuf - 1); - cerror(&re.Expbuf[1]); + regerror(i, &re.Expbuf, &re.Patbuf[1], + sizeof re.Patbuf - 1); + cerror(&re.Patbuf[1]); } } + if ((re.Nbra = re.Expbuf.re_nsub) > NBRA) + re.Nbra = NBRA; +#else /* !UXRE */ + if ((re.Expbuf = malloc(n = rcnt*32 + 2*(p-re.Patbuf) + 5)) == NULL) + goto complex; + if (value(IGNORECASE)) + loconv(re.Patbuf, re.Patbuf); + if (_compile(re.Patbuf, re.Expbuf, &re.Expbuf[n], '\0') == 0) { + char *cp; + free(re.Expbuf); + switch (regerrno) { + case 11: + cp = "Range endpoint too large|Range endpoint " + "too large in regular expression"; + break; + case 16: + cp = "Bad number|Bad number in regular expression"; + break; + case 25: + cp = "\"\\digit\" out of range"; + break; + case 36: + cp = "Badly formed re|Missing closing delimiter " + "for regular expression"; + break; + case 41: + cp = "No remembered search string."; + break; + case 42: + cp = "Unmatched \\( or \\)|More \\('s than \\)'s in " + "regular expression or vice-versa"; + break; + case 43: + cp = "Awash in \\('s!|Too many \\('d subexressions " + "in a regular expression"; + break; + case 44: + cp = "More than 2 numbers given in \\{~\\}"; + break; + case 45: + cp = "} expected after \\"; + break; + case 46: + cp = "First number exceeds second in \\{~\\}"; + break; + case 49: + miss: cp = "Missing ]"; + break; + case 67: + cp = "Illegal byte sequence."; + break; + default: + cp = "Unknown regexp error code!!"; + } + cerror(cp); + } + re.Circfl = circf; + re.Nbra = nbra; +#endif /* !UXRE */ re.Re_used = 1; re.Re_ident++; - if ((nbra = re.Re.re_nsub) > NBRA) - nbra = NBRA; return eof; } +#ifdef UXRE int execute(int gf, line *addr) { @@ -1076,7 +1217,7 @@ execute(int gf, line *addr) regmatch_t bralist[NBRA + 1]; if (gf) { - if (circfl) + if (re.Circfl) return 0; eflags |= REG_NOTBOL; p = loc2; @@ -1091,7 +1232,7 @@ execute(int gf, line *addr) * so don't fetch them otherwise (enables use of DFA). */ nsub = (re.Re_ident == subre.Re_ident ? NBRA : 0); - switch (regexec(&re.Re, p, nsub + 1, bralist, eflags)) { + switch (regexec(&re.Expbuf, p, nsub + 1, bralist, eflags)) { case 0: break; case REG_NOMATCH: @@ -1112,620 +1253,24 @@ execute(int gf, line *addr) return 1; } #else /* !UXRE */ -#define INSCHAR(c) { \ - if ((c) == '\n' || (c) == EOF) \ - cerror(catgets(catd, 1, 154, \ - "Missing ]")); \ - *ep++ = (c); \ - cclcnt++; \ - if (ep >= &expbuf[ESIZE]) \ - goto complex; \ - } - -int -compile(int eof, int oknl) -{ - register int c; - register char *ep; -#ifdef BIT8 -#ifndef NO_BE_BACKSLASH - bool haddash; -#endif /* !NO_BE_BACKSLASH */ -#endif /* BIT8 */ - char *lastep = NULL; - char bracket[NBRA], *bracketp, *rhsp; -#ifdef BIT8 - char *rhsq; -#endif - int cclcnt; - int i, cflg, closed; - - if (isalpha(eof) || isdigit(eof)) - error(catgets(catd, 1, 133, - "Regular expressions cannot be delimited by letters or digits")); - ep = expbuf; - c = getchar(); - if (eof == '\\') - switch (c) { - - case '/': - case '?': - if (scanre.Expbuf[0] == 0) - error(catgets(catd, 1, 134, - "No previous scan re|No previous scanning regular expression")); - resre(&scanre); - return (c); - - case '&': - if (subre.Expbuf[0] == 0) - error(catgets(catd, 1, 135, - "No previous substitute re|No previous substitute regular expression")); - resre(&subre); - return (c); - - default: - error(catgets(catd, 1, 136, - "Badly formed re|Regular expression \\ must be followed by / or ?")); - } - if (c == eof || c == '\n' || c == EOF) { - if (*ep == 0) - error(catgets(catd, 1, 137, - "No previous re|No previous regular expression")); - if (c == '\n' && oknl == 0) - error(catgets(catd, 1, 138, - "Missing closing delimiter@for regular expression")); - if (c != eof) - ungetchar(c); - return (eof); - } - bracketp = bracket; - nbra = 0; - circfl = 0; - closed = 0; - if (c == '^') { - c = getchar(); - circfl++; - } - ungetchar(c); - for (;;) { - if (ep >= &expbuf[ESIZE - 2]) -complex: - cerror(catgets(catd, 1, 139, - "Re too complex|Regular expression too complicated")); - c = getchar(); - if (c == eof || c == EOF) { - if (bracketp != bracket) - cerror(catgets(catd, 1, 140, - "Unmatched \\(|More \\('s than \\)'s in regular expression")); - *ep++ = CEOFC; - if (c == EOF) - ungetchar(c); - return (eof); - } - if (value(MAGIC)) { - if (c != '*' && (c != '\\' || peekchar() != '{') || - ep == expbuf) { - lastep = ep; - } - } else - if (c != '\\' || peekchar() != '*' || ep == expbuf) { - lastep = ep; - } - switch (c) { - - case '\\': - c = getchar(); - switch (c) { - - case '(': - if (nbra >= NBRA) - cerror(catgets(catd, 1, 141, -"Awash in \\('s!|Too many \\('d subexressions in a regular expression")); - *bracketp++ = nbra; - *ep++ = CBRA; - *ep++ = nbra++; - continue; - - case ')': - if (bracketp <= bracket) - cerror(catgets(catd, 1, 142, - "Extra \\)|More \\)'s than \\('s in regular expression")); - *ep++ = CKET; - *ep++ = *--bracketp; - closed++; - continue; - - case '<': - *ep++ = CBRC; - continue; - - case '>': - *ep++ = CLET; - continue; - case '{': - if (lastep == (char *)0) - goto defchar; - *lastep |= RNGE; - cflg = 0; -nlim: - c = getchar(); - i = 0; - do { - if ('0' <= c && c <= '9') - i = 10 * i + c - '0'; - else - cerror(catgets(catd, 1, 143, - "Bad number|Bad number in regular expression")); - } while ((c = getchar()) != '\\' && c != ','); - if (i > 255) - cerror(catgets(catd, 1, 144, -"Range endpoint too large|Range endpoint too large in regular expression")); - *ep++ = i; - if (c == ',') { - if (cflg++) - cerror(catgets(catd, 1, 145, - "More than 2 numbers given in \\{~\\}")); - if ((c = getchar()) == '\\') { - *ep++ = 255; - } else { - ungetchar(c); - goto nlim; - } - } - if (getchar() != '}') - cerror(catgets(catd, 1, 146, - "} expected after \\")); - if (!cflg) { - *ep++ = i; - } - else if ((ep[-1] & 0377) < (ep[-2] & 0377)) - cerror(catgets(catd, 1, 147, - "First number exceeds second in \\{~\\}")); - continue; - default: - if (c >= '1' && c <= '9') { - if ((c -= '1') >= closed) - cerror(catgets(catd, 1, 148, - "\"\\digit\" out of range")); - *ep++ = CBACK; - *ep++ = c; - continue; - } - } - if (value(MAGIC) == 0) -magic: - switch (c) { - - case '.': - *ep++ = CDOT; - continue; - - case '~': - rhsp = rhsbuf; -#ifdef BIT8 - rhsq = rhsquo; -#endif - while (*rhsp) { -#ifndef BIT8 - if (*rhsp & QUOTE) { - c = *rhsp & TRIM; -#else - if (*rhsq) { - c = *rhsp; -#endif - if (c == '&') - error(catgets(catd, 1, - 149, "Replacement pattern contains &@- cannot use in re")); - if (c >= '1' && c <= '9') - error(catgets(catd, 1, - 150, "Replacement pattern contains \\d@- cannot use in re")); - } - if (ep >= &expbuf[ESIZE-2]) - goto complex; - *ep++ = CCHR; -#ifndef BIT8 - *ep++ = *rhsp++ & TRIM; -#else - *ep++ = *rhsp++; - rhsq++; -#endif - } - continue; - - case '*': - if (ep == expbuf) - break; - if (*lastep == CBRA || *lastep == CKET) - cerror(catgets(catd, 1, 151, - "Illegal *|Can't * a \\( ... \\) in regular expression")); -#ifndef BIT8 - if (*lastep == CCHR && (lastep[1] & QUOTE)) - cerror(catgets(catd, 1, 152, - "Illegal *|Can't * a \\n in regular expression")); -#endif - *lastep |= STAR; - continue; - - case '[': - *ep++ = CCL; - *ep++ = 0; -#ifdef BIT8 -#ifndef NO_BE_BACKSLASH - haddash = 0; -#endif /* !NO_BE_BACKSLASH */ -#endif /* BIT8 */ - cclcnt = 1; - c = getchar(); - if (c == '^') { - c = getchar(); - ep[-2] = NCCL; - } -#ifndef NO_BE_BACKSLASH - if (c == ']') - cerror(catgets(catd, 1, 153, -"Bad character class|Empty character class '[]' or '[^]' cannot match")); - while (c != ']') { - if (c == '\\' && any(peekchar(), "]-^\\")) { -#ifndef BIT8 - c = getchar() | QUOTE; -#else /* BIT8 */ - if ((c = getchar()) == '-') { - haddash = 1; - c = getchar(); - } -#endif /* BIT8 */ - } - INSCHAR(c) - c = getchar(); - } -#ifdef BIT8 - if (haddash) - INSCHAR('-') -#endif /* BIT8 */ -#else /* NO_BE_BACKSLASH */ - /* - * There is no escape character inside a - * bracket expression. Characters lose their - * special meaning by position only. - */ - do - INSCHAR(c) - while ((c = getchar()) != ']'); -#endif /* NO_BE_BACKSLASH */ - lastep[1] = cclcnt; - continue; - } - if (c == EOF) { - ungetchar(EOF); - c = '\\'; - goto defchar; - } - *ep++ = CCHR; - if (c == '\n') - cerror(catgets(catd, 1, 155, - "No newlines in re's|Can't escape newlines into regular expressions")); -/* - if (c < '1' || c > NBRA + '1') { -*/ - *ep++ = c; - continue; -/* - } - c -= '1'; - if (c >= nbra) - cerror(catgets(catd, 1, 156, -"Bad \\n|\\n in regular expression with n greater than the number of \\('s")); - *ep++ = c | QUOTE; - continue; -*/ - - case '\n': - if (oknl) { - ungetchar(c); - *ep++ = CEOFC; - return (eof); - } - cerror(catgets(catd, 1, 157, - "Badly formed re|Missing closing delimiter for regular expression")); - - case '$': - if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') { - *ep++ = CDOL; - continue; - } - goto defchar; - - case '.': - case '~': - case '*': - case '[': - if (value(MAGIC)) - goto magic; -defchar: - default: - *ep++ = CCHR; - *ep++ = c; - continue; - } - } -} - -int -same(register int a, register int b) -{ - - return (a == b || value(IGNORECASE) && - ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a))); -} - -int -ecmp(register char *a, register char *b, register int count) -{ - while (count--) - if (!same(*a++, *b++)) - return (0); - return (1); -} - -char *locs; - int execute(int gf, line *addr) { - register char *p1, *p2; - register int c; + char *p; if (gf) { - if (circfl) - return (0); - locs = p1 = loc2; + if (re.Circfl) + return 0; + p = locs = loc2; } else { if (addr == zero) - return (0); - p1 = linebuf; + return 0; + p = linebuf; getline(*addr); - locs = 0; + if (value(IGNORECASE)) + loconv(linebuf, linebuf); } - p2 = expbuf; - if (circfl) { - loc1 = p1; - return (advance(p1, p2)); - } - /* fast check for first character */ - if (*p2 == CCHR) { - c = p2[1]; - do { - if (c != *p1 && (!value(IGNORECASE) || - !((islower(c) && toupper(c) == *p1) || - (islower(*p1&0377) && toupper(*p1&0377) == c)))) - continue; - if (advance(p1, p2)) { - loc1 = p1; - return (1); - } - } while (*p1++); - return (0); - } - /* regular algorithm */ - do { - if (advance(p1, p2)) { - loc1 = p1; - return (1); - } - } while (*p1++); - return (0); -} - -void -getrnge(register char *str) -{ - low = *str++ & 0377; - siz = (*str & 0377) == 255 ? 20000 : (*str & 0377) - low; -} - -#define uletter(c) (isalpha(c) || c == '_') - -int -advance(register char *lp, register char *ep) -{ - register char *curlp; - /* char *sp, *sp1; */ - int c, ct; - char *bbeg; - - for (;;) switch (*ep++) { - - case CCHR: -/* useless - if (*ep & QUOTE) { - c = *ep++ & TRIM; - sp = braslist[c]; - sp1 = braelist[c]; - while (sp < sp1) { - if (!same(*sp, *lp)) - return (0); - sp++, lp++; - } - continue; - } -*/ - if (!same(*ep, *lp)) - return (0); - ep++, lp++; - continue; - - case CDOT: - if (*lp++) - continue; - return (0); - - case CDOL: - if (*lp == 0) - continue; - return (0); - - case CEOFC: - loc2 = lp; - return (1); - - case CCL: - if (cclass(ep, *lp++, 1)) { - ep += *ep; - continue; - } - return (0); - - case NCCL: - if (cclass(ep, *lp++, 0)) { - ep += *ep; - continue; - } - return (0); - - case CBRA: - braslist[(int)*ep++] = lp; - continue; - - case CKET: - braelist[(int)*ep++] = lp; - continue; - - case CCHR|RNGE: - c = *ep++; - getrnge(ep); - while (low--) - if (!same(*lp++, c)) - return (0); - curlp = lp; - while (siz--) - if (!same(*lp++, c)) - break; - if (siz < 0) - lp++; - ep += 2; - goto star; - - case CDOT|RNGE: - getrnge(ep); - while (low--) - if (*lp++ == '\0') - return (0); - curlp = lp; - while (siz--) - if (*lp++ == '\0') - break; - if (siz < 0) - lp++; - ep += 2; - goto star; - - case CCL|RNGE: - case NCCL|RNGE: - getrnge(ep + *ep); - while (low--) { - if (!cclass(ep, *lp++, ep[-1] == (CCL|RNGE))) - return (0); - } - curlp = lp; - while (siz--) { - if (!cclass(ep, *lp++, ep[-1] == (CCL|RNGE))) - break; - } - if (siz < 0) - lp++; - ep += *ep + 2; - goto star; - - case CBACK: - bbeg = braslist[*ep & 0377]; - ct = braelist[*ep++ & 0377] - bbeg; - if (ecmp(bbeg, lp, ct)) { - lp += ct; - continue; - } - return (0); - - case CBACK|STAR: - bbeg = braslist[*ep & 0377]; - ct = braelist[*ep++ & 0377] - bbeg; - curlp = lp; - while (ecmp(bbeg, lp, ct)) - lp += ct; - while (lp >= curlp) { - if (advance(lp, ep)) - return (1); - lp -= ct; - } - return (0); - - case CDOT|STAR: - curlp = lp; - while (*lp++) - continue; - goto star; - - case CCHR|STAR: - curlp = lp; - while (same(*lp, *ep)) - lp++; - lp++; - ep++; - goto star; - - case CCL|STAR: - case NCCL|STAR: - curlp = lp; - while (cclass(ep, *lp++, ep[-1] == (CCL|STAR))) - continue; - ep += *ep; - goto star; -star: - do { - lp--; - if (lp == locs) - break; - if (advance(lp, ep)) - return (1); - } while (lp > curlp); - return (0); - - case CBRC: - if (lp == linebuf) - continue; - if ((isdigit(*lp&0377) || uletter(*lp&0377)) - && !uletter(lp[-1]&0377) && !isdigit(lp[-1]&0377)) - continue; - return (0); - - case CLET: - if (!uletter(*lp&0377) && !isdigit(*lp&0377)) - continue; - return (0); - - default: - error(catgets(catd, 1, 158, "Re internal error")); - } -} - -int -cclass(register char *set, register int c, int af) -{ - register int n; - - if (c == 0) - return (0); - if (value(IGNORECASE) && isupper(c)) - c = tolower(c); - n = *set++; - while (--n) - if (n > 2 && set[1] == '-') { - if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM)) - return (af); - set += 3; - n -= 2; - } else - if ((*set++ & TRIM) == c) - return (af); - return (!af); + circf = re.Circfl; + return step(p, re.Expbuf); } #endif /* !UXRE */ diff --git a/ex_re.h b/ex_re.h index 8d52a10..b3c729d 100644 --- a/ex_re.h +++ b/ex_re.h @@ -72,7 +72,7 @@ * * from ex_re.h 7.3 (Berkeley) 5/31/85 * - * @(#)ex_re.h 1.18 (gritter) 11/23/04 + * @(#)ex_re.h 1.19 (gritter) 2/19/05 */ #ifdef UXRE @@ -89,15 +89,13 @@ * more and alternation.) */ struct regexp { -#ifdef UXRE - char Expbuf[2*LBSIZE + 1]; - regex_t Re; + char Patbuf[2*LBSIZE + 1]; long Re_ident; bool Re_used; +#ifdef UXRE + regex_t Expbuf; #else /* !UXRE */ - char Expbuf[ESIZE + 2]; - int Low; - int Siz; + char *Expbuf; #endif /* !UXRE */ bool Circfl; short Nbra; @@ -113,18 +111,8 @@ var struct regexp re; /* Last re */ var struct regexp scanre; /* Last scanning re */ var struct regexp subre; /* Last substitute re */ -/* - * Defining circfl and expbuf like this saves us from having to change - * old code in the ex_re.c stuff. - */ -#define expbuf re.Expbuf -#define circfl re.Circfl -#define nbra re.Nbra -#define low re.Low -#define siz re.Siz - -var char *loc1; /* Where re began to match (in linebuf) */ -var char *loc2; /* First char after re match (") */ +extern char *loc1; /* Where re began to match (in linebuf) */ +extern char *loc2; /* First char after re match (") */ /* * Since the phototypesetter v7-epsilon @@ -136,30 +124,9 @@ extern struct regexp *resre(struct regexp *); /* * Definitions for substitute */ -var char *braslist[NBRA]; /* Starts of \(\)'ed text in lhs */ -var char *braelist[NBRA]; /* Ends... */ +extern char *braslist[NBRA]; /* Starts of \(\)'ed text in lhs */ +extern char *braelist[NBRA]; /* Ends... */ var char rhsbuf[RHSSIZE]; /* Rhs of last substitute */ #ifdef BIT8 var char rhsquo[RHSSIZE]; /* Quote indicator for rhsbuf */ #endif - -/* - * Definitions of codes for the compiled re's. - * The re algorithm is described in a paper - * by K. Thompson in the CACM about 10 years ago - * and is the same as in ed. - */ -#define STAR 1 -#define RNGE 0100 - -#define CBRA 1 -#define CDOT 4 -#define CCL 8 -#define NCCL 12 -#define CDOL 16 -#define CEOFC 17 -#define CKET 18 -#define CCHR 20 -#define CBRC 24 -#define CLET 25 -#define CBACK 36 diff --git a/ex_version.c b/ex_version.c index 60a5e9e..dffd771 100644 --- a/ex_version.c +++ b/ex_version.c @@ -70,12 +70,12 @@ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Sccsid @(#)ex_version.c 1.126 (gritter) 2/18/05 + * Sccsid @(#)ex_version.c 1.127 (gritter) 2/19/05 */ #include "ex.h" -static char *versionstring = "@(#)Version 4.0 (gritter) 2/18/05"; +static char *versionstring = "@(#)Version 4.0 (gritter) 2/19/05"; void printver(void)