* Traditional regular expressions can now be used with multibyte characters.

2005-02-19 18:01:26 +00:00 · 2005-02-19 18:01:26 +00:00 · 971748839e
parent a7523a2130
commit 971748839e
7 changed files with 204 additions and 696 deletions
--- a/1
+++ b/1
@ -1,4 +1,5 @@
 Release ...
+* Traditional regular expressions can now be used with multibyte characters.
 * If a line began with a tabulator and another tabulator was inserted with
  the cursor located on the first tabulator, the display was not updated
  appropriately since the last release (Bugreport by Matthew Fischer). (P)
--- a/18
+++ b/18
@ -72,7 +72,7 @@
 #
 #	from Makefile	7.13.1.3 (2.11BSD GTE) 1996/10/23
 #
-#	@(#)Makefile	1.47 (gritter) 2/19/05
+#	@(#)Makefile	1.49 (gritter) 2/19/05
 #

 #
@ -151,13 +151,15 @@ FEATURES	= -DLISPCODE -DCHDIR -DFASTTAG -DUCVISUAL -DMB -DBIT8
 #LANGMSG		= -DLANGMSG -DCATNAME='"UNKNOWN"'

 #
-# For multibyte character support in regular expressions, and for the
-# features of localized regular expressions ([:class:], [.c.], [=c=],
-# \(re\)*, \(re\)\{m,n\}), you need Caldera's 'UNIX(R) Regular Expression
-# Library' or a derivative of it. Comment out the three following lines if
-# you don't have it or if it does not compile; it needs some advanced
-# multibyte character support (wchar.h, wctype.h, btowc() etc.) which is
-# not provided by older compilation environments.
+# For POSIX regular expressions, e.g. the star applied to subexpressions
+# as in \(ab\)* and localized regular expressions like [:class:], [.c.],
+# and [=c=], you need Caldera's 'UNIX(R) Regular Expression Library' or
+# the included derivative of it.
+#
+# Comment out the three following lines if you do not have it or if it
+# does not compile; it needs some advanced multibyte character support
+# (wchar.h, wctype.h, btowc() etc.) which is not provided by older
+# compilation environments.
 #
 REINC	= -I./libuxre -DUXRE
 RELIB	= -L./libuxre -luxre
--- a/4
+++ b/4
@ -9,8 +9,4 @@ TODO list for ex

 - SVr4 ex probably has some silent features that this one should have too.

- The traditional regular expression code in ex_re.c could be updated to
-    work with multibyte characters. This would mostly involve to take the
-    code from libcommon/regexp.h of the Heirloom Toolchest.
-
 Gunnar Ritter						2/19/05
--- a/ex_proto.h
+++ b/ex_proto.h
@ -70,7 +70,7 @@
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *	@(#)ex_proto.h	1.27 (gritter) 2/15/05
+ *	@(#)ex_proto.h	1.28 (gritter) 2/19/05
 */

 /*
@ -245,9 +245,6 @@ extern int compile(int, int);
 extern int same(register int, register int);
 extern int ecmp(register char *, register char *, register int);
 extern int execute(int, line *);
-extern void getrnge(register char *);
-extern int advance(register char *, register char *);
-extern int cclass(register char *, register int, int);
 /* ex_set.c */
 extern void set(void);
 extern int setend(void);
--- a/ex_re.c
+++ b/ex_re.c
@ -73,7 +73,7 @@

 #ifndef	lint
 #ifdef	DOSCCS
-static char sccsid[] = "@(#)ex_re.c	1.46 (gritter) 2/18/05";
+static char sccsid[] = "@(#)ex_re.c	1.47 (gritter) 2/19/05";
 #endif
 #endif

@ -82,6 +82,86 @@ static char sccsid[] = "@(#)ex_re.c	1.46 (gritter) 2/18/05";
 #include "ex.h"
 #include "ex_re.h"

+#ifdef	UXRE
+char	*braslist[NBRA];
+char	*braelist[NBRA];
+#else	/* !UXRE */
+static int	regerrno;
+
+#define	INIT			register char *sp = instring;
+#define	GETC()			(*sp++)
+#define	PEEKC()			(*sp)
+#define	UNGETC(c)		(--sp)
+#define	RETURN(c)		return (c);
+#define	ERROR(c)		{ regerrno = c; return 0; }
+
+#define	compile(a, b, c, d)	_compile(a, b, c, d)
+#define	regexp_h_static		static
+
+#ifndef	NO_BE_BACKSLASH
+#define	REGEXP_H_VI_BACKSLASH
+#endif	/* !NO_BE_BACKSLASH */
+
+#ifdef	MB
+#define	REGEXP_H_WCHARS
+#endif	/* MB */
+
+#define	REGEXP_H_USED_FROM_VI
+
+#include "regexp.h"
+
+static size_t
+loconv(register char *dst, register const char *src)
+{
+	char	*odst = dst;
+
+#ifdef	MB
+	if (mb_cur_max > 1) {
+		char	mb[MB_LEN_MAX];
+		wchar_t wc;
+		int len, i, nlen;
+
+		while (*src) {
+			if ((*src & 0200) == 0) {
+				*dst++ = tolower(*src);
+				src++;
+			} else if ((len = mbtowc(&wc, src, mb_cur_max)) <= 0) {
+				*dst++ = *src++;
+			} else {
+				wc = towlower(wc);
+				if (len >= mb_cur_max) {
+					if ((nlen = wctomb(dst, wc)) <= len) {
+						dst += nlen;
+						src += len;
+					} else {
+						*dst++ = *src++;
+					}
+				} else {
+					if ((nlen = wctomb(mb, wc)) <= len) {
+						src += len;
+						for (i = 0; i < nlen; i++)
+							*dst++ = mb[i];
+					} else {
+						*dst++ = *src++;
+					}
+				}
+			}
+		}
+	} else
+#endif	/* MB */
+	{
+		while (*src) {
+			*dst++ = tolower(*src & 0377);
+			src++;
+		}
+	}
+	return dst - odst;
+}
+
+#undef	compile
+
+#endif	/* !UXRE */
+
 /*
 * Global, substitute and regular expressions.
 * Very similar to ed, with some re extensions and
@ -326,10 +406,10 @@ compsub(int ch)
 		/* fall into ... */
 	case '&':
 	redo:
-		if (re.Expbuf[0] == 0)
+		if (re.Patbuf[0] == 0)
 			error(catgets(catd, 1, 127,
 			"No previous re|No previous regular expression"));
-		if (subre.Expbuf[0] == 0)
+		if (subre.Patbuf[0] == 0)
 			error(catgets(catd, 1, 128,
 	"No previous substitute re|No previous substitute to repeat"));
 		break;
@ -643,9 +723,9 @@ dosub(void)
 				continue;
 			}
 #ifndef	BIT8
-		if (c < 0 && (c &= TRIM) >= '1' && c < nbra + '1') {
+		if (c < 0 && (c &= TRIM) >= '1' && c < re.Nbra + '1') {
 #else
-		if (q && c >= '1' && c < nbra + '1') {
+		if (q && c >= '1' && c < re.Nbra + '1') {
 #endif
 			sp = place(sp, braslist[c - '1'], braelist[c - '1']);
 			if (sp == 0)
@ -777,11 +857,10 @@ snote(register int total, register int lines)
 void
 cerror(char *s)
 {
-	expbuf[0] = '\0';
+	re.Patbuf[0] = '\0';
 	error(s);
 }

-#ifdef	UXRE
 void
 refree(struct regexp *rp)
 {
@ -801,17 +880,18 @@ refree(struct regexp *rp)
 	}
 	if ((r1->Re_used == 0 || rp->Re_ident != r1->Re_ident) &&
 			(r2->Re_used == 0 || rp->Re_ident != r2->Re_ident))
-		regfree(&rp->Re);
+#ifdef	UXRE
+		regfree(&rp->Expbuf);
+#else	/* !UXRE */
+		free(rp->Expbuf);
+#endif	/* !UXRE */
 	rp->Re_used = 0;
 }
-#endif

 struct regexp *
 savere(struct regexp *store)
 {
-#ifdef	UXRE
 	refree(store);
-#endif
 	copy(store, &re, sizeof re);
 	return store;
 }
@ -819,21 +899,18 @@ savere(struct regexp *store)
 struct regexp *
 resre(struct regexp *store)
 {
-#ifdef	UXRE
 	refree(&re);
-#endif
 	copy(&re, store, sizeof re);
 	return store;
 }

-#ifdef	UXRE
 int
 compile(int eof, int oknl)
 {
 	int c, d, i, n;
 	char	mb[MB_LEN_MAX+1];
-	char *p = re.Expbuf, *end = re.Expbuf + sizeof re.Expbuf;
-	int err = 0, nomagic = value(MAGIC) ? 0 : 1, esc;
+	char *p = re.Patbuf, *end = re.Patbuf + sizeof re.Patbuf;
+	int nomagic = value(MAGIC) ? 0 : 1, esc, rcnt = 0;
 	char *rhsp;
 #ifdef	BIT8
 	char *rhsq;
@ -847,13 +924,13 @@ compile(int eof, int oknl)
 		switch (c) {
 		case '/':
 		case '?':
-			if (scanre.Expbuf[0] == 0)
+			if (scanre.Patbuf[0] == 0)
 				error(catgets(catd, 1, 134,
 	"No previous scan re|No previous scanning regular expression"));
 			resre(&scanre);
 			return c;
 		case '&':
-			if (subre.Expbuf[0] == 0)
+			if (subre.Patbuf[0] == 0)
 				error(catgets(catd, 1, 135,
 	"No previous substitute re|No previous substitute regular expression"));
 			resre(&subre);
@ -874,9 +951,9 @@ compile(int eof, int oknl)
 			"No previous re|No previous regular expression"));
 		return eof;
 	}
-	nbra = circfl = 0;
+	re.Nbra = re.Circfl = 0;
 	if (c == '^')
-		circfl++;
+		re.Circfl++;
 	esc = 0;
 	goto havec;
 	/*
@ -963,6 +1040,7 @@ compile(int eof, int oknl)
 #endif
 			}
 		} else if (!esc && c == '[') {
+			rcnt++;
 			/*
 			 * Search for the end of the bracket expression
 			 * since '~' may not be recognized inside.
@ -980,6 +1058,7 @@ compile(int eof, int oknl)
 					if (p >= end)
 						goto complex;
 				}
+#ifdef	UXRE
 				if (d == '[' && (c == ':' || c == '.' ||
 							c == '=')) {
 					d = c;
@ -1001,6 +1080,7 @@ compile(int eof, int oknl)
 					}
 					c = EOF; /* -> reset d and continue */
 				}
+#endif	/* UXRE */
 				d = c;
 			} while (c != ']');
 		} else if (esc && c == '{') {
@ -1040,33 +1120,94 @@ compile(int eof, int oknl)
 complex:		cerror(catgets(catd, 1, 139,
 			"Re too complex|Regular expression too complicated"));
 	}
-	if (p == expbuf)
+	if (p == re.Patbuf)
 		*p++ = '.';	/* approximate historical behavior */
 	*p = '\0';
 	refree(&re);
+#ifdef	UXRE
 	c = REG_ANGLES | REG_BADRANGE;
 #ifndef	NO_BE_BACKSLASH
 	c |= REG_BKTESCAPE;
 #endif	/* !NO_BE_BACKSLASH */
 	if (value(IGNORECASE))
 		c |= REG_ICASE;
-	if ((err = regcomp(&re.Re, re.Expbuf, c)) != 0) {
-		switch (err) {
+	if ((i = regcomp(&re.Expbuf, re.Patbuf, c)) != 0) {
+		switch (i) {
 		case REG_EBRACK:
 		miss:	cerror(catgets(catd, 1, 154, "Missing ]"));
+			/*NOTREACHED*/
+			break;
 		default:
-			regerror(err, &re.Re, &re.Expbuf[1],
-					sizeof re.Expbuf - 1);
-			cerror(&re.Expbuf[1]);
+			regerror(i, &re.Expbuf, &re.Patbuf[1],
+					sizeof re.Patbuf - 1);
+			cerror(&re.Patbuf[1]);
 		}
 	}
+	if ((re.Nbra = re.Expbuf.re_nsub) > NBRA)
+		re.Nbra = NBRA;
+#else	/* !UXRE */
+	if ((re.Expbuf = malloc(n = rcnt*32 + 2*(p-re.Patbuf) + 5)) == NULL)
+		goto complex;
+	if (value(IGNORECASE))
+		loconv(re.Patbuf, re.Patbuf);
+	if (_compile(re.Patbuf, re.Expbuf, &re.Expbuf[n], '\0') == 0) {
+		char	*cp;
+		free(re.Expbuf);
+		switch (regerrno) {
+		case 11:
+			cp = "Range endpoint too large|Range endpoint "
+					"too large in regular expression";
+			break;
+		case 16:
+			cp = "Bad number|Bad number in regular expression";
+			break;
+		case 25:
+			cp = "\"\\digit\" out of range";
+			break;
+		case 36:
+			cp = "Badly formed re|Missing closing delimiter "
+				"for regular expression";
+			break;
+		case 41:
+			cp = "No remembered search string.";
+			break;
+		case 42:
+			cp = "Unmatched \\( or \\)|More \\('s than \\)'s in "
+				"regular expression or vice-versa";
+			break;
+		case 43:
+			cp = "Awash in \\('s!|Too many \\('d subexressions "
+				"in a regular expression";
+			break;
+		case 44:
+			cp = "More than 2 numbers given in \\{~\\}";
+			break;
+		case 45:
+			cp = "} expected after \\";
+			break;
+		case 46:
+			cp = "First number exceeds second in \\{~\\}";
+			break;
+		case 49:
+		miss:	cp = "Missing ]";
+			break;
+		case 67:
+			cp = "Illegal byte sequence.";
+			break;
+		default:
+			cp = "Unknown regexp error code!!";
+		}
+		cerror(cp);
+	}
+	re.Circfl = circf;
+	re.Nbra = nbra;
+#endif	/* !UXRE */
 	re.Re_used = 1;
 	re.Re_ident++;
-	if ((nbra = re.Re.re_nsub) > NBRA)
-		nbra = NBRA;
 	return eof;
 }

+#ifdef	UXRE
 int
 execute(int gf, line *addr)
 {
@ -1076,7 +1217,7 @@ execute(int gf, line *addr)
 	regmatch_t bralist[NBRA + 1];

 	if (gf) {
-		if (circfl)
+		if (re.Circfl)
 			return 0;
 		eflags |= REG_NOTBOL;
 		p = loc2;
@ -1091,7 +1232,7 @@ execute(int gf, line *addr)
 	 * so don't fetch them otherwise (enables use of DFA).
 	 */
 	nsub = (re.Re_ident == subre.Re_ident ? NBRA : 0);
-	switch (regexec(&re.Re, p, nsub + 1, bralist, eflags)) {
+	switch (regexec(&re.Expbuf, p, nsub + 1, bralist, eflags)) {
 	case 0:
 		break;
 	case REG_NOMATCH:
@ -1112,620 +1253,24 @@ execute(int gf, line *addr)
 	return 1;
 }
 #else	/* !UXRE */
-#define	INSCHAR(c)	{ \
-				if ((c) == '\n' || (c) == EOF) \
-					cerror(catgets(catd, 1, 154, \
-						"Missing ]")); \
-				*ep++ = (c); \
-				cclcnt++; \
-				if (ep >= &expbuf[ESIZE]) \
-					goto complex; \
-			}
-
-int
-compile(int eof, int oknl)
-{
-	register int c;
-	register char *ep;
-#ifdef	BIT8
-#ifndef	NO_BE_BACKSLASH
-	bool haddash;
-#endif	/* !NO_BE_BACKSLASH */
-#endif	/* BIT8 */
-	char *lastep = NULL;
-	char bracket[NBRA], *bracketp, *rhsp;
-#ifdef	BIT8
-	char *rhsq;
-#endif
-	int cclcnt;
-	int i, cflg, closed;
-
-	if (isalpha(eof) || isdigit(eof))
-		error(catgets(catd, 1, 133,
-	"Regular expressions cannot be delimited by letters or digits"));
-	ep = expbuf;
-	c = getchar();
-	if (eof == '\\')
-		switch (c) {
-
-		case '/':
-		case '?':
-			if (scanre.Expbuf[0] == 0)
-				error(catgets(catd, 1, 134,
-	"No previous scan re|No previous scanning regular expression"));
-			resre(&scanre);
-			return (c);
-
-		case '&':
-			if (subre.Expbuf[0] == 0)
-				error(catgets(catd, 1, 135,
-	"No previous substitute re|No previous substitute regular expression"));
-			resre(&subre);
-			return (c);
-
-		default:
-			error(catgets(catd, 1, 136,
-	"Badly formed re|Regular expression \\ must be followed by / or ?"));
-		}
-	if (c == eof || c == '\n' || c == EOF) {
-		if (*ep == 0)
-			error(catgets(catd, 1, 137,
-			"No previous re|No previous regular expression"));
-		if (c == '\n' && oknl == 0)
-			error(catgets(catd, 1, 138,
-			"Missing closing delimiter@for regular expression"));
-		if (c != eof)
-			ungetchar(c);
-		return (eof);
-	}
-	bracketp = bracket;
-	nbra = 0;
-	circfl = 0;
-	closed = 0;
-	if (c == '^') {
-		c = getchar();
-		circfl++;
-	}
-	ungetchar(c);
-	for (;;) {
-		if (ep >= &expbuf[ESIZE - 2])
-complex:
-			cerror(catgets(catd, 1, 139,
-			"Re too complex|Regular expression too complicated"));
-		c = getchar();
-		if (c == eof || c == EOF) {
-			if (bracketp != bracket)
-				cerror(catgets(catd, 1, 140,
-		"Unmatched \\(|More \\('s than \\)'s in regular expression"));
-			*ep++ = CEOFC;
-			if (c == EOF)
-				ungetchar(c);
-			return (eof);
-		}
-		if (value(MAGIC)) {
-			if (c != '*' && (c != '\\' || peekchar() != '{') ||
-					ep == expbuf) {
-				lastep = ep;
-			}
-		} else
-			if (c != '\\' || peekchar() != '*' || ep == expbuf) {
-				lastep = ep;
-			}
-		switch (c) {
-
-		case '\\':
-			c = getchar();
-			switch (c) {
-
-			case '(':
-				if (nbra >= NBRA)
-					cerror(catgets(catd, 1, 141,
-"Awash in \\('s!|Too many \\('d subexressions in a regular expression"));
-				*bracketp++ = nbra;
-				*ep++ = CBRA;
-				*ep++ = nbra++;
-				continue;
-
-			case ')':
-				if (bracketp <= bracket)
-					cerror(catgets(catd, 1, 142,
-		"Extra \\)|More \\)'s than \\('s in regular expression"));
-				*ep++ = CKET;
-				*ep++ = *--bracketp;
-				closed++;
-				continue;
-
-			case '<':
-				*ep++ = CBRC;
-				continue;
-
-			case '>':
-				*ep++ = CLET;
-				continue;
-			case '{':
-				if (lastep == (char *)0)
-					goto defchar;
-				*lastep |= RNGE;
-				cflg = 0;
-nlim:
-				c = getchar();
-				i = 0;
-				do {
-					if ('0' <= c && c <= '9')
-						i = 10 * i + c - '0';
-					else
-						cerror(catgets(catd, 1, 143,
-			"Bad number|Bad number in regular expression"));
-				} while ((c = getchar()) != '\\' && c != ',');
-				if (i > 255)
-					cerror(catgets(catd, 1, 144,
-"Range endpoint too large|Range endpoint too large in regular expression"));
-				*ep++ = i;
-				if (c == ',') {
-					if (cflg++)
-						cerror(catgets(catd, 1, 145,
-				"More than 2 numbers given in \\{~\\}"));
-					if ((c = getchar()) == '\\') {
-						*ep++ = 255;
-					} else {
-						ungetchar(c);
-						goto nlim;
-					}
-				}
-				if (getchar() != '}')
-					cerror(catgets(catd, 1, 146,
-						"} expected after \\"));
-				if (!cflg) {
-					*ep++ = i;
-				}
-				else if ((ep[-1] & 0377) < (ep[-2] & 0377))
-					cerror(catgets(catd, 1, 147,
-				"First number exceeds second in \\{~\\}"));
-				continue;
-			default:
-				if (c >= '1' && c <= '9') {
-					if ((c -= '1') >= closed)
-						cerror(catgets(catd, 1, 148,
-						"\"\\digit\" out of range"));
-					*ep++ = CBACK;
-					*ep++ = c;
-					continue;
-				}
-			}
-			if (value(MAGIC) == 0)
-magic:
-			switch (c) {
-
-			case '.':
-				*ep++ = CDOT;
-				continue;
-
-			case '~':
-				rhsp = rhsbuf;
-#ifdef	BIT8
-				rhsq = rhsquo;
-#endif
-				while (*rhsp) {
-#ifndef	BIT8
-					if (*rhsp & QUOTE) {
-						c = *rhsp & TRIM;
-#else
-					if (*rhsq) {
-						c = *rhsp;
-#endif
-						if (c == '&')
-							error(catgets(catd, 1,
-		149, "Replacement pattern contains &@- cannot use in re"));
-						if (c >= '1' && c <= '9')
-							error(catgets(catd, 1,
-		150, "Replacement pattern contains \\d@- cannot use in re"));
-					}
-					if (ep >= &expbuf[ESIZE-2])
-						goto complex;
-					*ep++ = CCHR;
-#ifndef	BIT8
-					*ep++ = *rhsp++ & TRIM;
-#else
-					*ep++ = *rhsp++;
-					rhsq++;
-#endif
-				}
-				continue;
-
-			case '*':
-				if (ep == expbuf)
-					break;
-				if (*lastep == CBRA || *lastep == CKET)
-					cerror(catgets(catd, 1, 151,
-		"Illegal *|Can't * a \\( ... \\) in regular expression"));
-#ifndef	BIT8
-				if (*lastep == CCHR && (lastep[1] & QUOTE))
-					cerror(catgets(catd, 1, 152,
-			"Illegal *|Can't * a \\n in regular expression"));
-#endif
-				*lastep |= STAR;
-				continue;
-
-			case '[':
-				*ep++ = CCL;
-				*ep++ = 0;
-#ifdef	BIT8
-#ifndef	NO_BE_BACKSLASH
-				haddash = 0;
-#endif	/* !NO_BE_BACKSLASH */
-#endif	/* BIT8 */
-				cclcnt = 1;
-				c = getchar();
-				if (c == '^') {
-					c = getchar();
-					ep[-2] = NCCL;
-				}
-#ifndef	NO_BE_BACKSLASH
-				if (c == ']')
-					cerror(catgets(catd, 1, 153,
-"Bad character class|Empty character class '[]' or '[^]' cannot match"));
-				while (c != ']') {
-					if (c == '\\' && any(peekchar(), "]-^\\")) {
-#ifndef	BIT8
-						c = getchar() | QUOTE;
-#else	/* BIT8 */
-						if ((c = getchar()) == '-') {
-							haddash = 1;
-							c = getchar();
-						}
-#endif	/* BIT8 */
-					}
-					INSCHAR(c)
-					c = getchar();
-				}
-#ifdef	BIT8
-				if (haddash)
-					INSCHAR('-')
-#endif	/* BIT8 */
-#else	/* NO_BE_BACKSLASH */
-				/*
-				 * There is no escape character inside a
-				 * bracket expression. Characters lose their
-				 * special meaning by position only.
-				 */
-				do
-					INSCHAR(c)
-				while ((c = getchar()) != ']');
-#endif	/* NO_BE_BACKSLASH */
-				lastep[1] = cclcnt;
-				continue;
-			}
-			if (c == EOF) {
-				ungetchar(EOF);
-				c = '\\';
-				goto defchar;
-			}
-			*ep++ = CCHR;
-			if (c == '\n')
-				cerror(catgets(catd, 1, 155,
-	"No newlines in re's|Can't escape newlines into regular expressions"));
-/*
-			if (c < '1' || c > NBRA + '1') {
-*/
-				*ep++ = c;
-				continue;
-/*
-			}
-			c -= '1';
-			if (c >= nbra)
-				cerror(catgets(catd, 1, 156,
-"Bad \\n|\\n in regular expression with n greater than the number of \\('s"));
-			*ep++ = c | QUOTE;
-			continue;
-*/
-
-		case '\n':
-			if (oknl) {
-				ungetchar(c);
-				*ep++ = CEOFC;
-				return (eof);
-			}
-			cerror(catgets(catd, 1, 157,
-	"Badly formed re|Missing closing delimiter for regular expression"));
-
-		case '$':
-			if (peekchar() == eof || peekchar() == EOF || oknl && peekchar() == '\n') {
-				*ep++ = CDOL;
-				continue;
-			}
-			goto defchar;
-
-		case '.':
-		case '~':
-		case '*':
-		case '[':
-			if (value(MAGIC))
-				goto magic;
-defchar:
-		default:
-			*ep++ = CCHR;
-			*ep++ = c;
-			continue;
-		}
-	}
-}
-
-int
-same(register int a, register int b)
-{
-
-	return (a == b || value(IGNORECASE) &&
-	   ((islower(a) && toupper(a) == b) || (islower(b) && toupper(b) == a)));
-}
-
-int
-ecmp(register char *a, register char *b, register int count)
-{
-	while (count--)
-		if (!same(*a++, *b++))
-			return (0);
-	return (1);
-}
-
-char	*locs;
-
 int
 execute(int gf, line *addr)
 {
-	register char *p1, *p2;
-	register int c;
+	char *p;

 	if (gf) {
-		if (circfl)
-			return (0);
-		locs = p1 = loc2;
+		if (re.Circfl)
+			return 0;
+		p = locs = loc2;
 	} else {
 		if (addr == zero)
-			return (0);
-		p1 = linebuf;
+			return 0;
+		p = linebuf;
 		getline(*addr);
-		locs = 0;
+		if (value(IGNORECASE))
+			loconv(linebuf, linebuf);
 	}
-	p2 = expbuf;
-	if (circfl) {
-		loc1 = p1;
-		return (advance(p1, p2));
-	}
-	/* fast check for first character */
-	if (*p2 == CCHR) {
-		c = p2[1];
-		do {
-			if (c != *p1 && (!value(IGNORECASE) ||
-			   !((islower(c) && toupper(c) == *p1) ||
-			   (islower(*p1&0377) && toupper(*p1&0377) == c))))
-				continue;
-			if (advance(p1, p2)) {
-				loc1 = p1;
-				return (1);
-			}
-		} while (*p1++);
-		return (0);
-	}
-	/* regular algorithm */
-	do {
-		if (advance(p1, p2)) {
-			loc1 = p1;
-			return (1);
-		}
-	} while (*p1++);
-	return (0);
-}
-
-void
-getrnge(register char *str)
-{
-	low = *str++ & 0377;
-	siz = (*str & 0377) == 255 ? 20000 : (*str & 0377) - low;
-}
-
-#define	uletter(c)	(isalpha(c) || c == '_')
-
-int
-advance(register char *lp, register char *ep)
-{
-	register char *curlp;
-	/* char *sp, *sp1; */
-	int c, ct;
-	char *bbeg;
-
-	for (;;) switch (*ep++) {
-
-	case CCHR:
-/* useless
-		if (*ep & QUOTE) {
-			c = *ep++ & TRIM;
-			sp = braslist[c];
-			sp1 = braelist[c];
-			while (sp < sp1) {
-				if (!same(*sp, *lp))
-					return (0);
-				sp++, lp++;
-			}
-			continue;
-		}
-*/
-		if (!same(*ep, *lp))
-			return (0);
-		ep++, lp++;
-		continue;
-
-	case CDOT:
-		if (*lp++)
-			continue;
-		return (0);
-
-	case CDOL:
-		if (*lp == 0)
-			continue;
-		return (0);
-
-	case CEOFC:
-		loc2 = lp;
-		return (1);
-
-	case CCL:
-		if (cclass(ep, *lp++, 1)) {
-			ep += *ep;
-			continue;
-		}
-		return (0);
-
-	case NCCL:
-		if (cclass(ep, *lp++, 0)) {
-			ep += *ep;
-			continue;
-		}
-		return (0);
-
-	case CBRA:
-		braslist[(int)*ep++] = lp;
-		continue;
-
-	case CKET:
-		braelist[(int)*ep++] = lp;
-		continue;
-
-	case CCHR|RNGE:
-		c = *ep++;
-		getrnge(ep);
-		while (low--)
-			if (!same(*lp++, c))
-				return (0);
-		curlp = lp;
-		while (siz--)
-			if (!same(*lp++, c))
-				break;
-		if (siz < 0)
-			lp++;
-		ep += 2;
-		goto star;
-
-	case CDOT|RNGE:
-		getrnge(ep);
-		while (low--)
-			if (*lp++ == '\0')
-				return (0);
-		curlp = lp;
-		while (siz--)
-			if (*lp++ == '\0')
-				break;
-		if (siz < 0)
-			lp++;
-		ep += 2;
-		goto star;
-
-	case CCL|RNGE:
-	case NCCL|RNGE:
-		getrnge(ep + *ep);
-		while (low--) {
-			if (!cclass(ep, *lp++, ep[-1] == (CCL|RNGE)))
-				return (0);
-		}
-		curlp = lp;
-		while (siz--) {
-			if (!cclass(ep, *lp++, ep[-1] == (CCL|RNGE)))
-				break;
-		}
-		if (siz < 0)
-			lp++;
-		ep += *ep + 2;
-		goto star;
-
-	case CBACK:
-		bbeg = braslist[*ep & 0377];
-		ct = braelist[*ep++ & 0377] - bbeg;
-		if (ecmp(bbeg, lp, ct)) {
-			lp += ct;
-			continue;
-		}
-		return (0);
-
-	case CBACK|STAR:
-		bbeg = braslist[*ep & 0377];
-		ct = braelist[*ep++ & 0377] - bbeg;
-		curlp = lp;
-		while (ecmp(bbeg, lp, ct))
-			lp += ct;
-		while (lp >= curlp) {
-			if (advance(lp, ep))
-				return (1);
-			lp -= ct;
-		}
-		return (0);
-
-	case CDOT|STAR:
-		curlp = lp;
-		while (*lp++)
-			continue;
-		goto star;
-
-	case CCHR|STAR:
-		curlp = lp;
-		while (same(*lp, *ep))
-			lp++;
-		lp++;
-		ep++;
-		goto star;
-
-	case CCL|STAR:
-	case NCCL|STAR:
-		curlp = lp;
-		while (cclass(ep, *lp++, ep[-1] == (CCL|STAR)))
-			continue;
-		ep += *ep;
-		goto star;
-star:
-		do {
-			lp--;
-			if (lp == locs)
-				break;
-			if (advance(lp, ep))
-				return (1);
-		} while (lp > curlp);
-		return (0);
-
-	case CBRC:
-		if (lp == linebuf)
-			continue;
-		if ((isdigit(*lp&0377) || uletter(*lp&0377))
-			&& !uletter(lp[-1]&0377) && !isdigit(lp[-1]&0377))
-			continue;
-		return (0);
-
-	case CLET:
-		if (!uletter(*lp&0377) && !isdigit(*lp&0377))
-			continue;
-		return (0);
-
-	default:
-		error(catgets(catd, 1, 158, "Re internal error"));
-	}
-}
-
-int
-cclass(register char *set, register int c, int af)
-{
-	register int n;
-
-	if (c == 0)
-		return (0);
-	if (value(IGNORECASE) && isupper(c))
-		c = tolower(c);
-	n = *set++;
-	while (--n)
-		if (n > 2 && set[1] == '-') {
-			if (c >= (set[0] & TRIM) && c <= (set[2] & TRIM))
-				return (af);
-			set += 3;
-			n -= 2;
-		} else
-			if ((*set++ & TRIM) == c)
-				return (af);
-	return (!af);
+	circf = re.Circfl;
+	return step(p, re.Expbuf);
 }
 #endif	/* !UXRE */
--- a/ex_re.h
+++ b/ex_re.h
@ -72,7 +72,7 @@
 *
 *	from ex_re.h	7.3 (Berkeley) 5/31/85
 *
- *	@(#)ex_re.h	1.18 (gritter) 11/23/04
+ *	@(#)ex_re.h	1.19 (gritter) 2/19/05
 */

 #ifdef	UXRE
@ -89,15 +89,13 @@
 * more and alternation.)
 */
 struct	regexp {
-#ifdef	UXRE
-	char	Expbuf[2*LBSIZE + 1];
-	regex_t	Re;
+	char	Patbuf[2*LBSIZE + 1];
 	long	Re_ident;
 	bool	Re_used;
+#ifdef	UXRE
+	regex_t	Expbuf;
 #else	/* !UXRE */
-	char	Expbuf[ESIZE + 2];
-	int	Low;
-	int	Siz;
+	char	*Expbuf;
 #endif	/* !UXRE */
 	bool	Circfl;
 	short	Nbra;
@ -113,18 +111,8 @@ var struct	regexp re;		/* Last re */
 var struct	regexp scanre;		/* Last scanning re */
 var struct	regexp subre;		/* Last substitute re */

-/*
- * Defining circfl and expbuf like this saves us from having to change
- * old code in the ex_re.c stuff.
- */
-#define	expbuf	re.Expbuf
-#define	circfl	re.Circfl
-#define	nbra	re.Nbra
-#define	low	re.Low
-#define	siz	re.Siz
-
-var	char	*loc1;		/* Where re began to match (in linebuf) */
-var	char	*loc2;		/* First char after re match (") */
+extern	char	*loc1;		/* Where re began to match (in linebuf) */
+extern	char	*loc2;		/* First char after re match (") */

 /*
 * Since the phototypesetter v7-epsilon
@ -136,30 +124,9 @@ extern struct regexp *resre(struct regexp *);
 /*
 * Definitions for substitute
 */
-var char	*braslist[NBRA];	/* Starts of \(\)'ed text in lhs */
-var char	*braelist[NBRA];	/* Ends... */
+extern char	*braslist[NBRA];	/* Starts of \(\)'ed text in lhs */
+extern char	*braelist[NBRA];	/* Ends... */
 var char	rhsbuf[RHSSIZE];	/* Rhs of last substitute */
 #ifdef	BIT8
 var char	rhsquo[RHSSIZE];	/* Quote indicator for rhsbuf */
 #endif
-
-/*
- * Definitions of codes for the compiled re's.
- * The re algorithm is described in a paper
- * by K. Thompson in the CACM about 10 years ago
- * and is the same as in ed.
- */
-#define	STAR	1
-#define	RNGE	0100
-
-#define	CBRA	1
-#define	CDOT	4
-#define	CCL	8
-#define	NCCL	12
-#define	CDOL	16
-#define	CEOFC	17
-#define	CKET	18
-#define	CCHR	20
-#define	CBRC	24
-#define	CLET	25
-#define	CBACK	36
--- a/ex_version.c
+++ b/ex_version.c
@ -70,12 +70,12 @@
 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
- * Sccsid @(#)ex_version.c	1.126 (gritter) 2/18/05
+ * Sccsid @(#)ex_version.c	1.127 (gritter) 2/19/05
 */

 #include "ex.h"

-static char *versionstring = "@(#)Version 4.0 (gritter) 2/18/05";
+static char *versionstring = "@(#)Version 4.0 (gritter) 2/19/05";

 void 
 printver(void)