From 2c0c217bb6d0fa7e12692513f0da94e16fab983f Mon Sep 17 00:00:00 2001 From: deepend Date: Thu, 10 Oct 2024 18:04:47 +0000 Subject: [PATCH] files from last release. ex-050325 --- Changes | 11 - Makefile | 18 +- README | 16 +- config.h | 24 +- ex.c | 13 +- ex.h | 16 +- ex.spec | 6 +- ex_addr.c | 2 +- ex_argv.h | 2 +- ex_cmdsub.c | 36 +- ex_get.c | 2 +- ex_io.c | 101 +++- ex_proto.h | 12 +- ex_put.c | 57 +- ex_re.c | 19 +- ex_re.h | 4 +- ex_subr.c | 48 +- ex_tagio.c | 5 +- ex_temp.c | 32 +- ex_temp.h | 8 +- ex_tty.c | 4 +- ex_tty.h | 2 +- ex_tune.h | 10 +- ex_unix.c | 2 +- ex_v.c | 40 +- ex_vadj.c | 29 +- ex_version.c | 46 +- ex_vget.c | 4 +- ex_vis.h | 14 +- ex_vmain.c | 36 +- ex_voper.c | 9 +- ex_vops.c | 9 +- ex_vops2.c | 4 +- ex_vops3.c | 10 +- ex_vput.c | 25 +- exrecover.c | 15 +- libuxre/COPYING.LGPL | 504 ++++++++++++++++++ libuxre/Makefile | 12 + libuxre/NOTES | 14 + libuxre/_collelem.c | 119 +++++ libuxre/_collmult.c | 55 ++ libuxre/bracket.c | 829 +++++++++++++++++++++++++++++ libuxre/colldata.h | 226 ++++++++ libuxre/onefile.c | 38 ++ libuxre/re.h | 228 ++++++++ libuxre/regcomp.c | 77 +++ libuxre/regdfa.c | 877 ++++++++++++++++++++++++++++++ libuxre/regdfa.h | 75 +++ libuxre/regerror.c | 95 ++++ libuxre/regex.h | 153 ++++++ libuxre/regexec.c | 68 +++ libuxre/regfree.c | 42 ++ libuxre/regnfa.c | 1070 +++++++++++++++++++++++++++++++++++++ libuxre/regparse.c | 1091 +++++++++++++++++++++++++++++++++++++ libuxre/stubs.c | 97 ++++ libuxre/wcharm.h | 63 +++ mapmalloc.c | 32 +- regexp.h | 1210 ++++++++++++++++++++++++++++++++++++++++++ 58 files changed, 7236 insertions(+), 430 deletions(-) create mode 100644 libuxre/COPYING.LGPL create mode 100644 libuxre/Makefile create mode 100644 libuxre/NOTES create mode 100644 libuxre/_collelem.c create mode 100644 libuxre/_collmult.c create mode 100644 libuxre/bracket.c create mode 100644 libuxre/colldata.h create mode 100644 libuxre/onefile.c create mode 100644 libuxre/re.h create mode 100644 libuxre/regcomp.c create mode 100644 libuxre/regdfa.c create mode 100644 libuxre/regdfa.h create mode 100644 libuxre/regerror.c create mode 100644 libuxre/regex.h create mode 100644 libuxre/regexec.c create mode 100644 libuxre/regfree.c create mode 100644 libuxre/regnfa.c create mode 100644 libuxre/regparse.c create mode 100644 libuxre/stubs.c create mode 100644 libuxre/wcharm.h create mode 100644 regexp.h diff --git a/Changes b/Changes index 7c5e228..15c8851 100644 --- a/Changes +++ b/Changes @@ -1,14 +1,3 @@ -Release ... -* The screen buffers for visual mode are now dynamically allocated, so - vi usually does not return to ex mode with "screen too large" when the - terminal is resized on a large monitor anymore. -* ex can now edit files with lines of arbitrary length. vi currently only - shows the beginning of a line that does not fit onto the screen in its - entirety. -* Viewing executables and compressed files is no longer inhibited. -* A bug in the supplied realloc() replacement could result in heap - corruption. (No resulting failures have been observed with ex so far.) - Release 3/25/05 * vi no longer dies with a segmentation fault if a line does not fit on the screen after an insertion. diff --git a/Makefile b/Makefile index a40fbec..b3e01ea 100644 --- a/Makefile +++ b/Makefile @@ -72,7 +72,7 @@ # # from Makefile 7.13.1.3 (2.11BSD GTE) 1996/10/23 # -# @(#)Makefile 1.51 (gritter) 2/25/07 +# @(#)Makefile 1.50 (gritter) 2/20/05 # # @@ -333,22 +333,6 @@ install: all install-man test -d $(DESTDIR)$(PRESERVEDIR) || mkdir -p $(DESTDIR)$(PRESERVEDIR) chmod 1777 $(DESTDIR)$(PRESERVEDIR) -PKGROOT = /var/tmp/heirloom-sh -PKGTEMP = /var/tmp -PKGPROTO = pkgproto - -ex.pkg: all - rm -rf $(PKGROOT) - mkdir -p $(PKGROOT) - $(MAKE) ROOT=$(PKGROOT) install - rm -f $(PKGPROTO) - echo 'i pkginfo' >$(PKGPROTO) - (cd $(PKGROOT) && find . -print | pkgproto) | >>$(PKGPROTO) sed 's:^\([df] [^ ]* [^ ]* [^ ]*\) .*:\1 root root:; s:^f\( [^ ]* etc/\):v \1:; s:^f\( [^ ]* var/\):v \1:; s:^\(s [^ ]* [^ ]*=\)\([^/]\):\1./\2:' - rm -rf $(PKGTEMP)/$@ - pkgmk -a `uname -m` -d $(PKGTEMP) -r $(PKGROOT) -f $(PKGPROTO) $@ - pkgtrans -o -s $(PKGTEMP) `pwd`/$@ $@ - rm -rf $(PKGROOT) $(PKGPROTO) $(PKGTEMP)/$@ - ex.o: config.h ex_argv.h ex.h ex_proto.h ex_temp.h ex_tty.h ex_tune.h ex.o: ex_vars.h libterm/libterm.h ex_addr.o: config.h ex.h ex_proto.h ex_re.h ex_tune.h ex_vars.h diff --git a/README b/README index e4cbce0..4d0e83d 100644 --- a/README +++ b/README @@ -23,7 +23,9 @@ How to build First look at the Makefile and change the settings there to match your build environment. Explanations are provided directly in this file. -You can tune the sizes of some internal buffers by editing config.h. +You can tune the sizes of some internal buffers by editing config.h. In +particular, you will have to raise the size of the 'TUBE' constants if +you wish to use really large-sized terminals. Then type 'make' and 'make install'. @@ -49,10 +51,8 @@ IBM AIX 5.1, 4.3 NEC SUPER-UX 10.2 NEC UX/4800 Release11.5 Rev.A Control Data EP/IX 2.2.1AA -FreeBSD 3.1, 4.5, 5.x, 6.1 +FreeBSD 3.1, 4.5, 5.x NetBSD 1.6, 2.0 -DragonFlyBSD 1.3.7-DEVELOPMENT -Mac OS X 10.4.3 Reports about other Unix systems are welcome, whether successful or not (in the latter case add a detailed description). This port of vi is only @@ -123,8 +123,8 @@ HP HP-UX B.11.11 and later FreeBSD 5.3 NetBSD 2.0 -It has been tested on xterm patch #192, rxvt-unicode 4.2, mlterm 2.9.1, -xiterm 0.5, and gnome-terminal 2.10.0. +It has been tested on xterm patch #192, rxvt-unicode 4.2, mlterm 2.9.1, and +xiterm 0.5. Successful operation is known for the following encodings: UTF-8, EUC-JP, EUC-KR, Big5, Big5-HKSCS, GB 2312, GBK. vi does not support locking-shift @@ -139,7 +139,7 @@ the erase key once after entering a multibyte character will result in an incomplete byte sequence. -Gunnar Ritter 01/12/07 +Gunnar Ritter 2/20/05 Freiburg i. Br. Germany - + diff --git a/config.h b/config.h index 277d636..23f7779 100644 --- a/config.h +++ b/config.h @@ -70,13 +70,35 @@ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * @(#)config.h 1.13 (gritter) 8/4/05 + * @(#)config.h 1.12 (gritter) 2/19/05 */ /* * Configurable settings for the ex editor. */ +/* + * Maximum screen size in visual mode. + * + * Because the routine "alloca" is not portable, TUBESIZE + * bytes are allocated on the stack each time you go into visual + * and then never freed by the system. Thus if you have no terminals + * which are larger than 24 * 80 you may well want to make TUBESIZE + * smaller. TUBECOLS should stay at 160 at least since this defines + * the maximum length of opening on hardcopies and allows two lines + * of open on terminals like adm3's (glass tty's) where it switches + * to pseudo hardcopy mode when a line gets longer than 80 characters. + */ +#ifndef VMUNIX +#define TUBELINES 70 /* Number of screen lines for visual */ +#define TUBECOLS 160 /* Number of screen columns for visual */ +#define TUBESIZE 6000 /* Maximum screen size for visual */ +#else /* VMUNIX */ +#define TUBELINES 100 +#define TUBECOLS 160 +#define TUBESIZE 16000 +#endif /* VMUNIX */ + /* * Various buffer sizes. */ diff --git a/ex.c b/ex.c index a6b5121..7461788 100644 --- a/ex.c +++ b/ex.c @@ -77,7 +77,7 @@ char *copyright = "@(#) Copyright (c) 1980 Regents of the University of California.\n\ All rights reserved.\n"; -static char sccsid[] = "@(#)ex.c 1.37 (gritter) 8/4/05"; +static char sccsid[] = "@(#)ex.c 1.36 (gritter) 2/13/05"; #endif /* DOSCCS */ #endif /* !lint */ @@ -294,13 +294,6 @@ main(register int ac, register char *av[]) poolsbrk(0); #endif - /* - * Initialize the primary buffers which were originally static. - * NOTE: Most of this must be repeated in ex_recover.c. - */ - linebuf = calloc(LBSIZE = BUFSIZ<4096?4096:BUFSIZ, sizeof *linebuf); - genbuf = calloc(MAXBSIZE, sizeof *genbuf); - /* * Immediately grab the tty modes so that we wont * get messed up if an interrupt comes in quickly. @@ -595,9 +588,9 @@ argend: else { globp = 0; if ((cp = getenv("HOME")) != 0 && *cp) { - safecat(safecp(genbuf, cp, MAXBSIZE, + safecat(safecp(genbuf, cp, sizeof genbuf, "$HOME too long"), - "/.exrc", MAXBSIZE, + "/.exrc", sizeof genbuf, "$HOME too long"); if (iownit(genbuf)) source(genbuf, 1); diff --git a/ex.h b/ex.h index 0d5cdb2..51ae267 100644 --- a/ex.h +++ b/ex.h @@ -72,7 +72,7 @@ * * from ex.h 7.7.1.1 (Berkeley) 8/12/86 * - * Sccsid @(#)ex.h 1.57 (gritter) 8/6/05 + * @(#)ex.h 1.53 (gritter) 2/17/05 */ /* @@ -276,8 +276,9 @@ typedef sigjmp_buf JMP_BUF; #define SETJMP(a) sigsetjmp(a, 1) #define LONGJMP(a, b) siglongjmp(a, b) -#undef MAXBSIZE -#define MAXBSIZE (2*LBSIZE) +#ifndef MAXBSIZE +#define MAXBSIZE 8192 /* Same as in 4.2BSD */ +#endif #include "ex_tune.h" #include "ex_vars.h" @@ -386,7 +387,7 @@ var short erfile; /* Error message file unit */ var line *fendcore; /* First address in line pointer space */ var char file[FNSIZE]; /* Working file name */ var bool fixedzero; /* zero file size was fixed (for visual) */ -var char *genbuf; /* Working buffer when manipulating linebuf */ +var char genbuf[MAXBSIZE]; /* Working buffer when manipulating linebuf */ var bool hush; /* Command line option - was given, hush up! */ var char *globp; /* (Untyped) input string to command mode */ var bool holdcm; /* Don't cursor address */ @@ -402,8 +403,7 @@ var bool laste; /* Last command was an "e" (or "rec") */ var char lastmac; /* Last macro called for ** */ var char lasttag[TAGSIZE]; /* Last argument to a tag command */ var char *linebp; /* Used in substituting in \n */ -var char *linebuf; /* The primary line buffer */ -var int LBSIZE; /* Size of linebuf */ +var char linebuf[LBSIZE]; /* The primary line buffer */ var bool listf; /* Command should run in list mode */ var line names['z'-'a'+2]; /* Mark registers a-z,' */ var int notecnt; /* Count for notify (to visual from cmd) */ @@ -456,7 +456,7 @@ var int exitoneof; /* exit command loop on EOF */ #define lastchar() lastc #define outchar(c) (*Outchar)(c) #define pastwh() (ignore(skipwh())) -#define pline(no, max) (*Pline)(no, max) +#define pline(no) (*Pline)(no) #define reset() LONGJMP(resetlab,1) #define resexit(a) copy(resetlab, a, sizeof (JMP_BUF)) #define setexit() SETJMP(resetlab) @@ -526,7 +526,7 @@ var line *undadot; /* If we saved all lines, dot reverts here */ #define UNDPUT 4 extern int (*Outchar)(int); -extern int (*Pline)(int, int); +extern void (*Pline)(int); extern int (*Putchar)(int); #define NOSTR (char *) 0 diff --git a/ex.spec b/ex.spec index 2a3184a..8879956 100644 --- a/ex.spec +++ b/ex.spec @@ -1,14 +1,14 @@ # -# Sccsid @(#)ex.spec 1.8 (gritter) 7/12/05 +# Sccsid @(#)ex.spec 1.7 (gritter) 1/22/05 # Summary: A port of the traditional ex/vi editors Name: ex -Version: 040420 +Version: 050325 Release: 1 License: BSD Source: %{name}-%{version}.tar.bz2 Group: System Environment/Base -Vendor: Gunnar Ritter +Vendor: Gunnar Ritter URL: BuildRoot: %{_tmppath}/%{name}-root diff --git a/ex_addr.c b/ex_addr.c index 8d92c64..ffd8db8 100644 --- a/ex_addr.c +++ b/ex_addr.c @@ -73,7 +73,7 @@ #ifndef lint #ifdef DOSCCS -static char sccsid[] = "@(#)ex_addr.c 1.11 (gritter) 8/4/05"; +static char sccsid[] = "@(#)ex_addr.c 1.10 (gritter) 2/17/05"; #endif #endif /* not lint */ diff --git a/ex_argv.h b/ex_argv.h index 2605ded..04a64ad 100644 --- a/ex_argv.h +++ b/ex_argv.h @@ -72,7 +72,7 @@ * * from ex_argv.h 7.3 (Berkeley) 5/31/85 * - * Sccsid @(#)ex_argv.h 1.9 (gritter) 8/4/05 + * @(#)ex_argv.h 1.8 (gritter) 11/23/04 */ /* diff --git a/ex_cmdsub.c b/ex_cmdsub.c index 6271626..1f798d7 100644 --- a/ex_cmdsub.c +++ b/ex_cmdsub.c @@ -73,7 +73,7 @@ #ifndef lint #ifdef DOSCCS -static char sccsid[] = "@(#)ex_cmdsub.c 1.32 (gritter) 8/6/05"; +static char sccsid[] = "@(#)ex_cmdsub.c 1.29 (gritter) 2/17/05"; #endif #endif @@ -281,7 +281,7 @@ void join(int c) { register line *a1; - char *cp, *cp1; + register char *cp, *cp1; cp = genbuf; *cp = 0; @@ -301,9 +301,8 @@ join(int c) } while (*cp++ = *cp1++) if (cp > &genbuf[LBSIZE-2]) - grow( - "Line overflow|Result line of join would be too long", - &cp1, NULL, &cp, NULL); + error(catgets(catd, 1, 40, + "Line overflow|Result line of join would be too long")); cp--; } strcLIN(genbuf); @@ -478,7 +477,7 @@ pragged(int kill) getline(dol[1]); if (kill) strcLIN(pkill[0]); - safecp(gp, linebuf, MAXBSIZE - (gp - genbuf), "Line too long"); + safecp(gp, linebuf, sizeof genbuf - (gp - genbuf), "Line too long"); strcLIN(genbuf); putmark(dol+1); undkind = UNDCHANGE; @@ -496,7 +495,7 @@ void shift(int c, int cnt) { register line *addr; - char *cp = NULL; + register char *cp = NULL; char *dp; register int i; @@ -535,9 +534,8 @@ shift(int c, int cnt) #endif } if (cp + strlen(dp = vpastwh(linebuf)) >= &genbuf[LBSIZE - 2]) - grow( - "Line too long|Result line after shift would be too long", - &dp, NULL, &cp, NULL); + error(catgets(catd, 1, 45, + "Line too long|Result line after shift would be too long")); CP(cp, dp); strcLIN(genbuf); putmark(addr); @@ -564,9 +562,9 @@ tagfind(bool quick) struct stat sbuf; char *savefirstpat = NULL; int ofailed; - char *ft_iofbuf = NULL; #ifdef FASTTAG int ft_iof; + char ft_iofbuf[MAXBSIZE]; off_t mid; /* assumed byte offset */ off_t top, bot; /* length of tag file */ #endif @@ -602,9 +600,6 @@ badtag: */ safecp(tagfbuf, svalue(TAGS), sizeof tagfbuf, "Tag too long"); fne = tagfbuf - 1; -#ifdef FASTTAG - ft_iofbuf = smalloc(MAXBSIZE); -#endif while (fne) { fn = ++fne; while (*fne && *fne != ' ') @@ -651,10 +646,10 @@ badtag: tseek(ft_iof, mid); if (mid > 0) /* to get first tag in file to work */ /* scan to next \n */ - if(tgets(linebuf, LBSIZE, ft_iof)==0) + if(tgets(linebuf, sizeof linebuf, ft_iof)==0) goto goleft; /* get the line itself */ - if(tgets(linebuf, LBSIZE, ft_iof)==0) + if(tgets(linebuf, sizeof linebuf, ft_iof)==0) goto goleft; #ifdef TDEBUG printf("tag: %o %o %o %s\n", bot, mid, top, linebuf); @@ -692,7 +687,6 @@ goleft: cp++; if (!*cp) badtags: - free(ft_iofbuf); serror(catgets(catd, 1, 48, "%s: Bad tags file entry"), lasttag); lp = filebuf; @@ -726,11 +720,9 @@ badtags: /* Different file. Do autowrite & get it. */ if (!quick) { ckaw(); - if (chng && dol > zero) { - free(ft_iofbuf); + if (chng && dol > zero) error(catgets(catd, 1, 49, "No write@since last change (:tag! overrides)")); - } } oglobp = globp; strcpy(cmdbuf2, "e! "); @@ -775,7 +767,6 @@ badtags: } else tflag = 0; } - free(ft_iofbuf); return; } /* end of "for each tag in file" */ @@ -788,7 +779,6 @@ badtags: close(io); #endif } /* end of "for each file in path" */ - free(ft_iofbuf); if (tfcount <= 0) error(catgets(catd, 1, 50, "No tags file")); else @@ -984,7 +974,7 @@ plines(line *adr1, register line *adr2, bool movedot) pofix(); for (addr = adr1; addr <= adr2; addr++) { getline(*addr); - pline(lineno(addr), -1); + pline(lineno(addr)); if (inopen) { putchar('\n' | QUOTE); } diff --git a/ex_get.c b/ex_get.c index 7bca559..f2325b9 100644 --- a/ex_get.c +++ b/ex_get.c @@ -73,7 +73,7 @@ #ifndef lint #ifdef DOSCCS -static char sccsid[] = "@(#)ex_get.c 1.18 (gritter) 8/4/05"; +static char sccsid[] = "@(#)ex_get.c 1.17 (gritter) 2/17/05"; #endif #endif diff --git a/ex_io.c b/ex_io.c index cd456cb..8769a9d 100644 --- a/ex_io.c +++ b/ex_io.c @@ -73,7 +73,7 @@ #ifndef lint #ifdef DOSCCS -static char sccsid[] = "@(#)ex_io.c 1.42 (gritter) 8/4/05"; +static char sccsid[] = "@(#)ex_io.c 1.40 (gritter) 2/17/05"; #endif #endif @@ -407,7 +407,9 @@ samei(struct stat *sp, char *cp) void rop(int c) { + register int i; struct stat stbuf; + char magic[4]; static int ovro; /* old value(READONLY) */ static int denied; /* 1 if READONLY was set due to file permissions */ @@ -457,6 +459,95 @@ rop(int c) case S_IFIFO: error(catgets(catd, 1, 96, " Named pipe")); #endif + + case S_IFREG: + /* + * The magics are checked byte-wise now to avoid + * endianness problems. Some quite old types + * were omitted. + * + * Feel free too add more magics here, but do not + * make this a copy of the `file' program. + * + * GR + */ + i = read(io, magic, sizeof(magic)); + lseek(io, (off_t) 0, SEEK_SET); + if (i != sizeof(magic)) + break; + switch (magic[0]&0377) { + + case 01: /* big endian a.out */ + if (magic[1] != 05 && magic[1] != 07 + && magic[1] != 010 && magic[1] != 011 + && magic[1] != 013 && magic[1] != 030 + && magic[1] != 031) + break; + goto is_exec; + case 0314: /* Linux/ia32 QMAGIC */ + if (magic[1] != 0 || magic[2] != 0144) + break; + goto is_exec; + case 05: /* data overlay on exec */ + case 07: /* unshared */ + case 010: /* shared text */ + case 011: /* separate I/D */ + case 013: /* VM/Unix demand paged */ + case 030: /* PDP-11 Overlay shared */ + case 031: /* PDP-11 Overlay sep I/D */ + if (magic[1] == 01) +is_exec: + error(catgets(catd, 1, 97, " Executable")); + break; + + case 037: + switch (magic[1]&0377) { + case 036: /* pack */ + case 037: /* compact */ + case 0235: /* compress */ + case 0213: /* gzip */ + /* + * We omit bzip2 here since it has + * an ASCII header. + */ + error(catgets(catd, 1, 98, " Compressed Data")); + } + break; + + case 0177: + if (magic[1] == 'E' && magic[2] == 'L' + && magic[3] == 'F') + error(catgets(catd, 1, 99, " ELF object")); + break; + + default: + break; + } +#ifdef notdef + /* + * We do not forbid the editing of portable archives + * because it is reasonable to edit them, especially + * if they are archives of text files. This is + * especially useful if you archive source files together + * and copy them to another system with ~%take, since + * the files sometimes show up munged and must be fixed. + */ + case 0177545: + case 0177555: + error(catgets(catd, 1, 100, " Archive")); + + default: +#ifdef mbb + /* C/70 has a 10 bit byte */ + if (magic & 03401600) +#else + /* Everybody else has an 8 bit byte */ + if (magic & 0100200) +#endif + error(catgets(catd, 1, 101, " Non-ascii file")); + break; + } +#endif /* notdef */ } if (c != 'r') { if (value(READONLY) && denied) { @@ -748,7 +839,7 @@ int getfile(void) { register short c; - char *lp, *fp; + register char *lp, *fp; lp = linebuf; fp = nextip; @@ -767,8 +858,10 @@ getfile(void) fp = genbuf; cntch += ninbuf+1; } - if (lp >= &linebuf[LBSIZE]) - grow(" Line too long", &lp, NULL, &fp, &nextip); + if (lp >= &linebuf[LBSIZE]) { + synced(); + error(catgets(catd, 1, 118, " Line too long")); + } c = *fp++; if (c == 0) { cntnull++; diff --git a/ex_proto.h b/ex_proto.h index 81e0190..6af8c18 100644 --- a/ex_proto.h +++ b/ex_proto.h @@ -70,8 +70,7 @@ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Sccsid @(#)ex_proto.h 1.33 (gritter) 8/6/05 + * @(#)ex_proto.h 1.28 (gritter) 2/19/05 */ /* @@ -192,12 +191,12 @@ extern int widthok(int c); extern int GETWC(char *); /* ex_put.c */ extern int (*setlist(int))(int); -extern int (*setnumb(int))(int, int); +extern void (*setnumb(int))(int); extern int listchar(int); extern int normchar(register int); extern void slobber(int); -extern int numbline(int, int); -extern int normline(int, int); +extern void numbline(int); +extern void normline(int); extern int putchar(int); extern int termchar(int); extern void flush2(void); @@ -325,8 +324,6 @@ extern void onemt(int); extern char *movestr(char *, const char *); extern char *safecp(char *, const char *, size_t, char *, ...); extern char *safecat(char *, const char *, size_t, char *, ...); -extern void grow(char *, char **, char **, char **, char **); -extern void *smalloc(size_t); /* ex_tagio.c */ extern int topen(char *, char *); extern int tseek(int, off_t); @@ -510,7 +507,6 @@ extern void vswitch(int); extern int wskipleft(char *, char *); extern int wskipright(char *, char *); extern int wsamechar(char *, int); -extern int xwcwidth(wint_t); #endif /* MB */ /* ex_vput.c */ extern void vclear(void); diff --git a/ex_put.c b/ex_put.c index 0b77955..7f0e0d0 100644 --- a/ex_put.c +++ b/ex_put.c @@ -73,7 +73,7 @@ #ifndef lint #ifdef DOSCCS -static char sccsid[] = "@(#)ex_put.c 1.35 (gritter) 12/25/06"; +static char sccsid[] = "@(#)ex_put.c 1.32 (gritter) 2/17/05"; #endif #endif @@ -101,7 +101,7 @@ static char sccsid[] = "@(#)ex_put.c 1.35 (gritter) 12/25/06"; */ int (*Outchar)(int) = termchar; int (*Putchar)(int) = normchar; -int (*Pline)(int, int) = normline; +void (*Pline)(int) = normline; int (* setlist(int t))(int) @@ -114,10 +114,10 @@ setlist(int t))(int) return (P); } -int (* -setnumb(int t))(int, int) +void (* +setnumb(int t))(int) { - register int (*P)(int, int); + register void (*P)(int); numberf = t; P = Pline; @@ -272,68 +272,41 @@ slobber(int c) /* * Print a line with a number. */ -int -numbline(int i, int max) +void +numbline(int i) { if (shudclob) slobber(' '); - max -= printf("%6d ", i); - return normline(0, max); + printf("%6d ", i); + normline(0); } /* * Normal line output, no numbering. */ -int -normline(int unused, int max) +/*ARGSUSED*/ +void +normline(int unused) { - extern short vcntcol, lastsc; - short ovc = -1; register char *cp; - int (*OO)(int); int c, n; - int ret = 0; - if (max > 0) - vcntcol = 0; if (shudclob) slobber(linebuf[0]); /* pdp-11 doprnt is not reentrant so can't use "printf" here in case we are tracing */ cp = linebuf; vcolbp = cp; - while (*cp && max) { + while (*cp) { vcolbp = cp; nextc(c, cp, n); cp += n; - if (max > 0) { - if (Outchar != qcount) { - OO = Outchar; - Outchar = qcount; - putchar(c); - Outchar = OO; - } else - putchar(c); - if ((vcntcol-1) % WCOLS == 0 && lastsc > 1) - vcntcol++; - if (vcntcol >= max) { - putchar('@'); - vcntcol = ovc + 1; - lastsc = 1; - ret = 1; - break; - } - ovc = vcntcol; - if (Outchar != qcount) - putchar(c); - } else - putchar(c); + putchar(c); } if (!inopen) { putchar('\n' | QUOTE); } - return ret; } /* @@ -1054,7 +1027,7 @@ setoutt(void) void vlprintf(char *cp, va_list ap) { - register int (*P)(int); + register int (*P)(); P = setlist(1); vprintf(cp, ap); diff --git a/ex_re.c b/ex_re.c index fb00c38..7f903eb 100644 --- a/ex_re.c +++ b/ex_re.c @@ -73,7 +73,7 @@ #ifndef lint #ifdef DOSCCS -static char sccsid[] = "@(#)ex_re.c 1.60 (gritter) 8/6/05"; +static char sccsid[] = "@(#)ex_re.c 1.56 (gritter) 3/25/05"; #endif #endif @@ -359,7 +359,7 @@ substitute(int c) * but we don't want to break other, reasonable cases. */ while (*loc2) { - if (++hopcount > LBSIZE) + if (++hopcount > sizeof linebuf) error(catgets(catd, 1, 124, "substitution loop")); if (dosubcon(1, addr) == 0) @@ -416,10 +416,10 @@ compsub(int ch) /* fall into ... */ case '&': redo: - if (re.Patbuf == NULL || re.Patbuf[0] == 0) + if (re.Patbuf[0] == 0) error(catgets(catd, 1, 127, "No previous re|No previous regular expression")); - if (subre.Patbuf == NULL || subre.Patbuf[0] == 0) + if (subre.Patbuf[0] == 0) error(catgets(catd, 1, 128, "No previous substitute re|No previous substitute to repeat")); break; @@ -606,7 +606,7 @@ confirmed(line *a) if (cflag == 0) return (1); pofix(); - pline(lineno(a), -1); + pline(lineno(a)); if (inopen) putchar('\n' | QUOTE); c = column(loc1 - 1); @@ -867,8 +867,7 @@ snote(register int total, register int lines) void cerror(char *s) { - if (re.Patbuf != NULL) - re.Patbuf[0] = '\0'; + re.Patbuf[0] = '\0'; error(s); } @@ -1021,17 +1020,13 @@ compile(int eof, int oknl) { int c, d, i, n = 0; char mb[MB_LEN_MAX+1]; - char *p, *end; + char *p = re.Patbuf, *end = re.Patbuf + sizeof re.Patbuf; int nomagic = value(MAGIC) ? 0 : 1, esc, rcnt = 0; char *rhsp; #ifdef BIT8 char *rhsq; #endif - free(re.Patbuf); - re.Patbuf = smalloc(2*LBSIZE + 1); - p = re.Patbuf; - end = &re.Patbuf[2*LBSIZE + 1]; if (isalpha(eof) || isdigit(eof)) error(catgets(catd, 1, 133, "Regular expressions cannot be delimited by letters or digits")); diff --git a/ex_re.h b/ex_re.h index 7e4eb21..2966ef0 100644 --- a/ex_re.h +++ b/ex_re.h @@ -72,7 +72,7 @@ * * from ex_re.h 7.3 (Berkeley) 5/31/85 * - * Sccsid @(#)ex_re.h 1.24 (gritter) 8/4/05 + * @(#)ex_re.h 1.22 (gritter) 2/19/05 */ /* @@ -85,7 +85,7 @@ * more and alternation.) */ struct regexp { - char *Patbuf; + char Patbuf[2*LBSIZE + 1]; long Re_ident; void *Expbuf; bool Circfl; diff --git a/ex_subr.c b/ex_subr.c index 663a78e..6f884aa 100644 --- a/ex_subr.c +++ b/ex_subr.c @@ -73,7 +73,7 @@ #ifndef lint #ifdef DOSCCS -static char sccsid[] = "@(#)ex_subr.c 1.41 (gritter) 12/25/06"; +static char sccsid[] = "@(#)ex_subr.c 1.37 (gritter) 2/15/05"; #endif #endif @@ -84,7 +84,7 @@ static char sccsid[] = "@(#)ex_subr.c 1.41 (gritter) 12/25/06"; #include "ex_tty.h" #include "ex_vis.h" -short lastsc; +static short lastsc; /* * Random routines, in alphabetical order. @@ -631,14 +631,14 @@ plural(long i) : catgets(catd, 1, 179, "s")); } -short vcntcol; +static short vcntcol; int qcolumn(register char *lim, register char *gp) { register int x = 0, n = 1; int c, i; - int (*OO)(int); + int (*OO)(); OO = Outchar; Outchar = qcount; @@ -651,7 +651,7 @@ qcolumn(register char *lim, register char *gp) n = skipright(linebuf, lim); x = lim[n], lim[n] = 0; } - pline(0, inopen ? WLINES*WCOLS : -1); + pline(0); if (lim != NULL) lim[n] = x; if (gp) @@ -851,7 +851,7 @@ char * vfindcol(int i) { register char *cp; - register int (*OO)(int) = Outchar; + register int (*OO)() = Outchar; int c, n = 0; Outchar = qcount; @@ -1151,39 +1151,3 @@ safecat(char *s1, const char *s2, size_t max, char *msg, ...) /*NOTREACHED*/ return NULL; } - -/* - * Grow the line and generic buffers. - */ -void -grow(char *msg, char **tolb0, char **tolb1, char **togb0, char **togb1) -{ - char *nlb, *ngb = NULL; - - if ((nlb = realloc(linebuf, LBSIZE + 4096)) == NULL || - (ngb = realloc(genbuf, 2 * (LBSIZE + 4096))) == NULL) { - synced(); - error(msg); - } - if (tolb0) - *tolb0 += nlb - linebuf; - if (tolb1) - *tolb1 += nlb - linebuf; - if (togb0) - *togb0 += ngb - genbuf; - if (togb1) - *togb1 += ngb - genbuf; - linebuf = nlb; - genbuf = ngb; - LBSIZE += 4096; -} - -void * -smalloc(size_t size) -{ - void *vp; - - if ((vp = malloc(size)) == NULL) - error("no space"); - return vp; -} diff --git a/ex_tagio.c b/ex_tagio.c index 73141a3..f7570f5 100644 --- a/ex_tagio.c +++ b/ex_tagio.c @@ -81,7 +81,7 @@ #ifdef FASTTAG #ifndef lint #ifdef DOSCCS -static char sccsid[] = "@(#)ex_tagio.c 1.12 (gritter) 8/4/05"; +static char sccsid[] = "@(#)ex_tagio.c 1.11 (gritter) 11/27/04"; #endif #endif @@ -92,7 +92,7 @@ static char sccsid[] = "@(#)ex_tagio.c 1.12 (gritter) 8/4/05"; static long offset = -1; static long block = -1; static int bcnt = 0; -static int b_size; +static int b_size = MAXBSIZE; static char *ibuf; int @@ -101,7 +101,6 @@ topen(char *file, char *buf) int fd; struct stat statb; - b_size = MAXBSIZE; offset = -1; block = -1; if ((fd = open(file, O_RDONLY, 0)) < 0) diff --git a/ex_temp.c b/ex_temp.c index da908b3..a1127f9 100644 --- a/ex_temp.c +++ b/ex_temp.c @@ -73,7 +73,7 @@ #ifndef lint #ifdef DOSCCS -static char sccsid[] = "@(#)ex_temp.c 1.27 (gritter) 12/25/06"; +static char sccsid[] = "@(#)ex_temp.c 1.24 (gritter) 11/24/04"; #endif #endif @@ -236,7 +236,7 @@ putline(void) } } tl = tline; - tline += (((lp - linebuf) + BNDRY - 1) >> SHFT) & TLNMSK; + tline += (((lp - linebuf) + BNDRY - 1) >> SHFT) & 077776; return (tl); } @@ -265,29 +265,25 @@ getblock(line atl, int iof) if (iof == READ) { if (hitin2 == 0) { if (ichang2) { - blkio(iblock2, ibuff2, - (ssize_t(*)(int, void *, size_t))write); + blkio(iblock2, ibuff2, (ssize_t(*)())write); } ichang2 = 0; iblock2 = bno; - blkio(bno, ibuff2, - (ssize_t(*)(int, void *, size_t))read); + blkio(bno, ibuff2, (ssize_t(*)())read); hitin2 = 1; return (ibuff2 + off); } hitin2 = 0; if (ichanged) { - blkio(iblock, ibuff, - (ssize_t(*)(int, void *, size_t))write); + blkio(iblock, ibuff, (ssize_t(*)())write); } ichanged = 0; iblock = bno; - blkio(bno, ibuff, (ssize_t(*)(int, void *, size_t))read); + blkio(bno, ibuff, (ssize_t(*)())read); return (ibuff + off); } if (oblock >= 0) { - blkio(oblock, obuff, - (ssize_t(*)(int, void *, size_t))write); + blkio(oblock, obuff, (ssize_t(*)())write); } oblock = bno; return (obuff + off); @@ -305,7 +301,7 @@ blkio(bloc b, char *buf, ssize_t (*iofcn)(int, void *, size_t)) #ifdef INCORB if (b < INCORB) { - if (iofcn == (ssize_t(*)(int, void *, size_t))read) { + if (iofcn == (ssize_t(*)())read) { copy(buf, pagrnd(incorb[b+1]), (size_t) BUFSIZ); return; } @@ -362,13 +358,13 @@ synctmp(void) if (dol == zero) return; if (ichanged) - blkio(iblock, ibuff, (ssize_t(*)(int, void *, size_t))write); + blkio(iblock, ibuff, (ssize_t(*)())write); ichanged = 0; if (ichang2) - blkio(iblock2, ibuff2, (ssize_t(*)(int, void *, size_t))write); + blkio(iblock2, ibuff2, (ssize_t(*)())write); ichang2 = 0; if (oblock != -1) - blkio(oblock, obuff, (ssize_t(*)(int, void *, size_t))write); + blkio(oblock, obuff, (ssize_t(*)())write); time(&H.Time); uid = getuid(); *zero = (line) H.Time; @@ -570,7 +566,7 @@ shread(void) return (0); } -int getREG(void); +int getREG(); void putreg(int c) @@ -661,9 +657,8 @@ YANKreg(register int c) { register line *addr; register struct strreg *sp; - char *savelb; + char savelb[LBSIZE]; - savelb = smalloc(LBSIZE); if (isdigit(c)) kshift(); if (islower(c)) @@ -693,7 +688,6 @@ YANKreg(register int c) rbflush(); killed(); CP(linebuf,savelb); - free(savelb); } void diff --git a/ex_temp.h b/ex_temp.h index 0691cde..8bbb1d1 100644 --- a/ex_temp.h +++ b/ex_temp.h @@ -72,7 +72,7 @@ * * from ex_temp.h 7.4 (Berkeley) 5/31/85 * - * Sccsid @(#)ex_temp.h 1.10 (gritter) 8/4/05 + * @(#)ex_temp.h 1.8 (gritter) 1/26/02 */ /* @@ -111,8 +111,7 @@ #define OFFBTS 7 /* 6 */ #define OFFMSK 0177 /* 077 */ #define SHFT 2 /* 3 */ -#define TLNMSK 077776 -#else /* VMUNIX */ +#else #ifdef LARGEF #define BLKMSK 017777777777 #else @@ -129,8 +128,7 @@ #define OFFBTS 10 #define OFFMSK 01777 #define SHFT 0 -#define TLNMSK 017777777776 -#endif /* VMUNIX */ +#endif /* * The editor uses three buffers into the temporary file (ed uses two diff --git a/ex_tty.c b/ex_tty.c index 7d3a7ed..05702b6 100644 --- a/ex_tty.c +++ b/ex_tty.c @@ -73,7 +73,7 @@ #ifndef lint #ifdef DOSCCS -static char sccsid[] = "@(#)ex_tty.c 1.30 (gritter) 8/4/05"; +static char sccsid[] = "@(#)ex_tty.c 1.29 (gritter) 2/17/05"; #endif #endif @@ -300,6 +300,8 @@ setsize(void) i = TLINES; if (TLINES <= 5) TLINES = 24; + if (TLINES > TUBELINES) + TLINES = TUBELINES; l = TLINES; if (ospeed < B1200) l = 9; /* including the message line at the bottom */ diff --git a/ex_tty.h b/ex_tty.h index 79fcf15..66f0573 100644 --- a/ex_tty.h +++ b/ex_tty.h @@ -72,7 +72,7 @@ * * from ex_tty.h 7.5.1 (2.11BSD GTE) 12/9/94 * - * Sccsid @(#)ex_tty.h 1.14 (gritter) 8/4/05 + * @(#)ex_tty.h 1.13 (gritter) 12/1/04 */ #include "libterm/libterm.h" diff --git a/ex_tune.h b/ex_tune.h index 6ef8efb..2026cea 100644 --- a/ex_tune.h +++ b/ex_tune.h @@ -72,7 +72,7 @@ * * from ex_tune.h 7.8.1 (2.11BSD) 1996/10/23 * - * Sccsid @(#)ex_tune.h 1.14 (gritter) 8/4/05 + * @(#)ex_tune.h 1.12 (gritter) 12/1/04 */ /* @@ -111,7 +111,8 @@ /* * Maximums * - * Most definitions are quite generous. + * The definition of LBSIZE should be the same as BUFSIZ (512 usually). + * Most other definitions are quite generous. */ /* FNSIZE is also defined in expreserve.c */ #ifdef _POSIX_PATH_MAX @@ -120,17 +121,20 @@ #define FNSIZE 128 /* File name size */ #endif #ifdef VMUNIX +#define LBSIZE BUFSIZ /* Line buffer size */ #ifndef ESIZE /* see config.h */ #define ESIZE 512 /* Regular expression buffer size */ #endif #define CRSIZE BUFSIZ /* Crypt buffer size */ #else /* !VMUNIX */ #ifdef u370 +#define LBSIZE 4096 #ifndef ESIZE /* see config.h */ #define ESIZE 512 #endif #define CRSIZE 4096 #else +#define LBSIZE 512 /* Line length */ #ifndef ESIZE /* see config.h */ #define ESIZE 128 /* Size of compiled re */ #endif @@ -182,7 +186,7 @@ #undef NCARGS #ifndef VMUNIX #define NARGS 100 /* Maximum number of names in "next" */ -#define NCARGS 512 /* Maximum arglist chars in "next" */ +#define NCARGS LBSIZE /* Maximum arglist chars in "next" */ #else #define NCARGS 5120 #define NARGS (NCARGS/6) diff --git a/ex_unix.c b/ex_unix.c index 4ad17f9..4be0e67 100644 --- a/ex_unix.c +++ b/ex_unix.c @@ -73,7 +73,7 @@ #ifndef lint #ifdef DOSCCS -static char sccsid[] = "@(#)ex_unix.c 1.17 (gritter) 8/4/05"; +static char sccsid[] = "@(#)ex_unix.c 1.16 (gritter) 11/23/04"; #endif #endif diff --git a/ex_v.c b/ex_v.c index 969336c..fe984ee 100644 --- a/ex_v.c +++ b/ex_v.c @@ -73,7 +73,7 @@ #ifndef lint #ifdef DOSCCS -static char sccsid[] = "@(#)ex_v.c 1.19 (gritter) 8/4/05"; +static char sccsid[] = "@(#)ex_v.c 1.17 (gritter) 11/27/04"; #endif #endif @@ -126,41 +126,20 @@ static char sccsid[] = "@(#)ex_v.c 1.19 (gritter) 8/4/05"; */ JMP_BUF venv; -static cell *atube; - -/* - * Determine and set the size for visual mode buffers. - */ -static void -tubesizes(void) -{ - TUBELINES = TLINES; - /* - * TUBECOLS should stay at 160 at least since this defines the - * maximum length of opening on hardcopies and allows two lines - * of open on terminals like adm3's (glass tty's) where it - * switches to pseudo hardcopy mode when a line gets longer - * than 80 characters. - */ - TUBECOLS = TCOLUMNS < 160 ? 160 : TCOLUMNS; - TUBESIZE = TLINES * TCOLUMNS; - free(vlinfo); - free(vtube); - free(atube); - vlinfo = malloc((TUBELINES+2) * sizeof *vlinfo); - vtube = malloc(TUBELINES * sizeof *vtube); - atube = malloc((TUBESIZE + LBSIZE) * sizeof *atube); - if (vlinfo == NULL || vtube == NULL || atube == NULL) - error("Screen too large"); -} /* * Enter open mode */ +#ifdef u370 +cell atube[TUBESIZE+LBSIZE]; +#endif void oop(void) { register char *ic; +#ifndef u370 + cell atube[TUBESIZE + LBSIZE]; +#endif struct termios f; /* mjm: was register */ int resize; @@ -171,7 +150,6 @@ oop(void) inopen = 0; addr1 = addr2 = dot; } - tubesizes(); #ifdef SIGWINCH signal(SIGWINCH, onwinch); #endif @@ -280,6 +258,9 @@ void vop(void) { register int c; +#ifndef u370 + cell atube[TUBESIZE + LBSIZE]; +#endif struct termios f; /* mjm: was register */ int resize; @@ -318,7 +299,6 @@ toopen: inopen = 0; addr1 = addr2 = dot; } - tubesizes(); #ifdef SIGWINCH signal(SIGWINCH, onwinch); #endif diff --git a/ex_vadj.c b/ex_vadj.c index c579ee7..0bb62c7 100644 --- a/ex_vadj.c +++ b/ex_vadj.c @@ -73,7 +73,7 @@ #ifndef lint #ifdef DOSCCS -static char sccsid[] = "@(#)ex_vadj.c 1.16 (gritter) 8/6/05"; +static char sccsid[] = "@(#)ex_vadj.c 1.11 (gritter) 3/4/05"; #endif #endif @@ -192,8 +192,7 @@ vreopen(int p, int lineno, int l) * necessary to determine which way to go. */ vigoto(p, 0); - if (pline(lineno, WCOLS*WLINES) == 1) - vp->vflags |= VLONG; + pline(lineno); /* * When we are typing part of a line for hardcopy open, don't @@ -419,14 +418,14 @@ vopenup(int cnt, int could, int l) void vadjAL(int p, int cnt) { - cell **tlines = smalloc(TUBELINES * sizeof *tlines); + cell *tlines[TUBELINES]; register int from, to; #ifdef ADEBUG if (trace) tfixnl(), fprintf(trace, "vadjal(%d, %d)\n", p, cnt); #endif - copy(tlines, vtube, TUBELINES * sizeof *tlines); /*SASSIGN*/ + copy(tlines, vtube, sizeof vtube); /*SASSIGN*/ for (from = p, to = p + cnt; to <= WECHO; from++, to++) vtube[to] = tlines[from]; for (to = p; from <= WECHO; from++, to++) { @@ -438,7 +437,6 @@ vadjAL(int p, int cnt) * necessarily consistent with the rest of the display. */ vclrech(0); - free(tlines); } /* @@ -511,7 +509,7 @@ void vscroll(register int cnt) { register int from, to; - cell **tlines; + cell *tlines[TUBELINES]; #ifdef ADEBUG if (trace) @@ -521,8 +519,7 @@ vscroll(register int cnt) error(catgets(catd, 1, 219, "Internal error: vscroll")); if (cnt == 0) return; - tlines = smalloc(TUBELINES * sizeof *tlines); - copy(tlines, vtube, TUBELINES * sizeof *tlines); + copy(tlines, vtube, sizeof vtube); for (to = ZERO, from = ZERO + cnt; to <= WECHO - cnt; to++, from++) vtube[to] = tlines[from]; for (from = ZERO; to <= WECHO; to++, from++) { @@ -531,7 +528,6 @@ vscroll(register int cnt) } for (from = 0; from <= vcnt; from++) LINE(from) -= cnt; - free(tlines); } /* @@ -684,7 +680,7 @@ vredraw(register int p) { register int l; register line *tp; - char *temp; + char temp[LBSIZE]; bool anydl = 0; short oldhold = hold; @@ -701,7 +697,6 @@ vredraw(register int p) if (p < 0 /* || p > WECHO */) error(catgets(catd, 1, 221, "Internal error: vredraw")); - temp = smalloc(LBSIZE); /* * Trim the ragged edges (lines which are off the screen but * not yet logically discarded), save the current line, and @@ -795,7 +790,6 @@ vredraw(register int p) if (trace) tvliny(); #endif - free(temp); } /* @@ -851,7 +845,7 @@ vdellin(int p, int cnt, int l) void vadjDL(int p, int cnt) { - cell **tlines = smalloc(TUBELINES * sizeof *tlines); + cell *tlines[TUBELINES]; register int from, to; #ifdef ADEBUG @@ -863,14 +857,13 @@ vadjDL(int p, int cnt) * v7 compiler (released with phototypesetter for v6) * can't hack it. */ - copy(tlines, vtube, TUBELINES * sizeof *tlines); /*SASSIGN*/ + copy(tlines, vtube, sizeof vtube); /*SASSIGN*/ for (from = p + cnt, to = p; from <= WECHO; from++, to++) vtube[to] = tlines[from]; for (from = p; to <= WECHO; from++, to++) { vtube[to] = tlines[from]; vclrcell(vtube[to], WCOLS); } - free(tlines); } /* * Sync the screen, like redraw but more lazy and willing to leave @@ -903,7 +896,7 @@ void vsync1(register int p) { register int l; - char *temp; + char temp[LBSIZE]; register struct vlinfo *vp = &vlinfo[0]; short oldhold = hold; @@ -918,7 +911,6 @@ vsync1(register int p) } if (state == HARDOPEN || splitw) return; - temp = smalloc(LBSIZE); vscrap(); CP(temp, linebuf); if (vcnt == 0) @@ -968,7 +960,6 @@ vsync1(register int p) hold = oldhold; if (heldech) vclrech(0); - free(temp); } /* diff --git a/ex_version.c b/ex_version.c index 103d5a9..317fdd3 100644 --- a/ex_version.c +++ b/ex_version.c @@ -70,12 +70,12 @@ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Sccsid @(#)ex_version.c 1.146 (gritter) 12/25/06 + * Sccsid @(#)ex_version.c 1.132 (gritter) 3/25/05 */ #include "ex.h" -static char *versionstring = "@(#)Version 4.0 (gritter) 12/25/06"; +static char *versionstring = "@(#)Version 4.0 (gritter) 3/25/05"; void printver(void) @@ -88,45 +88,3 @@ printver(void) #endif ); } -/* SLIST */ -/* -ex.c:static char sccsid[] = "@(#)ex.c 1.37 (gritter) 8/4/05"; -ex.h: * Sccsid @(#)ex.h 1.57 (gritter) 8/6/05 -ex_addr.c:static char sccsid[] = "@(#)ex_addr.c 1.11 (gritter) 8/4/05"; -ex_argv.h: * Sccsid @(#)ex_argv.h 1.9 (gritter) 8/4/05 -ex_cmds.c:static char sccsid[] = "@(#)ex_cmds.c 1.22 (gritter) 2/18/05"; -ex_cmds2.c:static char sccsid[] = "@(#)ex_cmds2.c 1.18 (gritter) 2/17/05"; -ex_cmdsub.c:static char sccsid[] = "@(#)ex_cmdsub.c 1.32 (gritter) 8/6/05"; -ex_data.c:static char sccsid[] = "@(#)ex_data.c 1.14 (gritter) 11/23/04"; -ex_extern.c:static char sccsid[] = "@(#)ex_extern.c 1.6 (gritter) 11/23/04"; -ex_get.c:static char sccsid[] = "@(#)ex_get.c 1.18 (gritter) 8/4/05"; -ex_io.c:static char sccsid[] = "@(#)ex_io.c 1.42 (gritter) 8/4/05"; -ex_proto.h: * Sccsid @(#)ex_proto.h 1.33 (gritter) 8/6/05 -ex_put.c:static char sccsid[] = "@(#)ex_put.c 1.35 (gritter) 12/25/06"; -ex_re.c:static char sccsid[] = "@(#)ex_re.c 1.60 (gritter) 8/6/05"; -ex_re.h: * Sccsid @(#)ex_re.h 1.24 (gritter) 8/4/05 -ex_set.c:static char sccsid[] = "@(#)ex_set.c 1.11 (gritter) 11/24/04"; -ex_subr.c:static char sccsid[] = "@(#)ex_subr.c 1.41 (gritter) 12/25/06"; -ex_tagio.c:static char sccsid[] = "@(#)ex_tagio.c 1.12 (gritter) 8/4/05"; -ex_temp.c:static char sccsid[] = "@(#)ex_temp.c 1.27 (gritter) 12/25/06"; -ex_temp.h: * Sccsid @(#)ex_temp.h 1.10 (gritter) 8/4/05 -ex_tty.c:static char sccsid[] = "@(#)ex_tty.c 1.30 (gritter) 8/4/05"; -ex_tty.h: * Sccsid @(#)ex_tty.h 1.14 (gritter) 8/4/05 -ex_tune.h: * Sccsid @(#)ex_tune.h 1.14 (gritter) 8/4/05 -ex_unix.c:static char sccsid[] = "@(#)ex_unix.c 1.17 (gritter) 8/4/05"; -ex_v.c:static char sccsid[] = "@(#)ex_v.c 1.19 (gritter) 8/4/05"; -ex_vadj.c:static char sccsid[] = "@(#)ex_vadj.c 1.16 (gritter) 8/6/05"; -ex_vget.c:static char sccsid[] = "@(#)ex_vget.c 1.31 (gritter) 8/6/05"; -ex_vis.h: * Sccsid @(#)ex_vis.h 1.22 (gritter) 8/6/05 -ex_vmain.c:static char sccsid[] = "@(#)ex_vmain.c 1.34 (gritter) 8/6/05"; -ex_voper.c:static char sccsid[] = "@(#)ex_voper.c 1.28 (gritter) 8/6/05"; -ex_vops.c:static char sccsid[] = "@(#)ex_vops.c 1.28 (gritter) 8/4/05"; -ex_vops2.c:static char sccsid[] = "@(#)ex_vops2.c 1.36 (gritter) 12/25/06"; -ex_vops3.c:static char sccsid[] = "@(#)ex_vops3.c 1.21 (gritter) 8/4/05"; -ex_vput.c:static char sccsid[] = "@(#)ex_vput.c 1.52 (gritter) 12/25/06"; -ex_vwind.c:static char sccsid[] = "@(#)ex_vwind.c 1.9 (gritter) 11/23/04"; -expreserve.c:static char sccsid[] UNUSED = "@(#)expreserve.c 1.23 (gritter) 11/27/04"; -exrecover.c:static char sccsid[] UNUSED = "@(#)exrecover.c 1.23 (gritter) 12/25/06"; -mapmalloc.c: * Sccsid @(#)mapmalloc.c 1.7 (gritter) 8/18/05 -printf.c:static char sccsid[] = "@(#)printf.c 1.15 (gritter) 12/1/04"; -*/ diff --git a/ex_vget.c b/ex_vget.c index 868c843..e3a50d0 100644 --- a/ex_vget.c +++ b/ex_vget.c @@ -73,7 +73,7 @@ #ifndef lint #ifdef DOSCCS -static char sccsid[] = "@(#)ex_vget.c 1.31 (gritter) 8/6/05"; +static char sccsid[] = "@(#)ex_vget.c 1.29 (gritter) 2/15/05"; #endif #endif @@ -406,7 +406,7 @@ int readecho(int c) { register char *sc = cursor; - register int (*OP)(int, int); + register void (*OP)(int); bool waste; register int OPeek; diff --git a/ex_vis.h b/ex_vis.h index cc79433..d6fd99c 100644 --- a/ex_vis.h +++ b/ex_vis.h @@ -72,7 +72,7 @@ * * from ex_vis.h 7.4 (Berkeley) 5/31/85 * - * Sccsid @(#)ex_vis.h 1.22 (gritter) 8/6/05 + * @(#)ex_vis.h 1.18 (gritter) 3/24/05 */ /* @@ -99,13 +99,6 @@ var enum { HARDOPEN = 3 } bastate, state; -/* - * Maximum screen size in visual mode, dynamically set as needed. - */ -var short TUBELINES; -var short TUBECOLS; -var short TUBESIZE; - /* * The screen in visual and crtopen is of varying size; the basic * window has top basWTOP and basWLINES lines are thereby implied. @@ -147,14 +140,13 @@ struct vlinfo { short vdepth; /* Depth of displayed line */ /*mjm: was char */ short vflags; /* Is line potentially dirty ? */ }; -var struct vlinfo *vlinfo; +var struct vlinfo vlinfo[TUBELINES + 2]; #define DEPTH(c) (vlinfo[c].vdepth) #define LINE(c) (vlinfo[c].vliny) #define FLAGS(c) (vlinfo[c].vflags) #define VDIRT 1 -#define VLONG 2 /* Line does not fit on a single screen */ /* * Hacks to copy vlinfo structures around @@ -178,7 +170,7 @@ var short vcnt; * data itself. It is also rearranged during insert mode across line * boundaries to make incore work easier. */ -var cell **vtube; +var cell *vtube[TUBELINES]; var cell *vtube0; /* diff --git a/ex_vmain.c b/ex_vmain.c index 8bdfa3f..ac07f92 100644 --- a/ex_vmain.c +++ b/ex_vmain.c @@ -73,7 +73,7 @@ #ifndef lint #ifdef DOSCCS -static char sccsid[] = "@(#)ex_vmain.c 1.34 (gritter) 8/6/05"; +static char sccsid[] = "@(#)ex_vmain.c 1.29 (gritter) 2/17/05"; #endif #endif @@ -100,11 +100,11 @@ vmain(void) cell esave[TUBECOLS]; char *oglobp; short d; - line *addr, *odot; + line *addr; int ind, nlput; int shouldpo = 0; int onumber = 0, olist = 0; - int (*OPline)(int, int) = NULL; + void (*OPline)(int) = NULL; int (*OPutchar)(int) = NULL; CLOBBGRD(c); @@ -514,22 +514,13 @@ reread: */ case CTRL('b'): vsave(); - odot = dot; if (one + vcline != dot && vcnt > 2) { addr = dot - vcline + 2 - (cnt-1)*basWLINES; forbid (addr <= zero); + dot = (line*)addr; vcnt = vcline = 0; - do { - dot = addr; - vzop(0, 0, '^'); - /* - * When a single line fills the - * entire screen, ^B can become - * a no-op without the loop. - */ - } while (dot == odot && --addr > zero); - } else - vzop(0, 0, '^'); + } + vzop(0, 0, '^'); continue; /* @@ -1273,7 +1264,7 @@ vremote(int cnt, void (*f)(int), int arg) void vsave(void) { - char *temp = smalloc(LBSIZE); + char temp[LBSIZE]; CP(temp, linebuf); if (FIXUNDO && vundkind == VCHNG || vundkind == VCAPU) { @@ -1299,13 +1290,10 @@ vsave(void) * almost always be in a read buffer so this may well avoid disk i/o. */ getDOT(); - if (strcmp(linebuf, temp) == 0) { - free(temp); + if (strcmp(linebuf, temp) == 0) return; - } strcLIN(temp); putmark(dot); - free(temp); } #undef forbid @@ -1379,13 +1367,7 @@ vzop(int hadcnt, int cnt, register int c) break; case '+': - if ((vtube[WLINES-1][0] == '~' && vtube[WLINES-1][1] == 0) || - dot == dol) { - forbid (addr >= dol); - } else { - if (addr > dol) - addr = dol; - } + forbid (addr >= dol); /* fall into ... */ case CR: diff --git a/ex_voper.c b/ex_voper.c index 5cb693e..d86b5d9 100644 --- a/ex_voper.c +++ b/ex_voper.c @@ -73,7 +73,7 @@ #ifndef lint #ifdef DOSCCS -static char sccsid[] = "@(#)ex_voper.c 1.28 (gritter) 8/6/05"; +static char sccsid[] = "@(#)ex_voper.c 1.27 (gritter) 2/15/05"; #endif #endif @@ -715,7 +715,6 @@ errlab: wcursor = 0; if (readecho(c)) return; -lloop: if (!vglobp) vscandir[0] = genbuf[0]; oglobp = globp; @@ -791,12 +790,6 @@ slerr: if (state == CRTOPEN && addr != dot) vup1(); vupdown(addr - dot, NOSTR); - if (FLAGS(vcline)&VLONG && - addr == odot && - cursor == ocurs) { - cursor = NULL; - goto lloop; - } } return; } diff --git a/ex_vops.c b/ex_vops.c index f9a7aa5..4d28fac 100644 --- a/ex_vops.c +++ b/ex_vops.c @@ -73,7 +73,7 @@ #ifndef lint #ifdef DOSCCS -static char sccsid[] = "@(#)ex_vops.c 1.28 (gritter) 8/4/05"; +static char sccsid[] = "@(#)ex_vops.c 1.26 (gritter) 1/13/05"; #endif #endif @@ -143,7 +143,7 @@ vundo ( register int cnt; register line *addr; register char *cp; - char *temp = smalloc(LBSIZE); + char temp[LBSIZE]; bool savenote; int (*OO)(int); short oldhold = hold; @@ -246,7 +246,6 @@ vundo ( beep(); break; } - free(temp); } /* @@ -260,7 +259,7 @@ vmacchng(int fromvis) { line *savedot, *savedol; char *savecursor; - char *savelb; + char savelb[LBSIZE]; int nlines, more; /* register line *a1, *a2; */ /* char ch; */ /* DEBUG */ @@ -284,7 +283,6 @@ vmacchng(int fromvis) vch_mac = VC_ONECHANGE; break; case VC_ONECHANGE: - savelb = smalloc(LBSIZE); /* Save current state somewhere */ #ifdef TRACE vudump("before vmacchng hairy case"); @@ -332,7 +330,6 @@ vmacchng(int fromvis) #ifdef TRACE vudump("after vmacchng"); #endif - free(savelb); break; case VC_NOTINMAC: case VC_MANYCHANGE: diff --git a/ex_vops2.c b/ex_vops2.c index 48050ff..d7cd3fb 100644 --- a/ex_vops2.c +++ b/ex_vops2.c @@ -73,7 +73,7 @@ #ifndef lint #ifdef DOSCCS -static char sccsid[] = "@(#)ex_vops2.c 1.36 (gritter) 12/25/06"; +static char sccsid[] = "@(#)ex_vops2.c 1.34 (gritter) 1/12/05"; #endif #endif @@ -611,7 +611,7 @@ vgetline(int cnt, char *gcursor, bool *aescaped, int commch) int x, y, iwhite, backsl=0; cell *iglobp; char cstr[2]; - int (*OO)(int) = Outchar; + int (*OO)() = Outchar; /* * Clear the output state and counters diff --git a/ex_vops3.c b/ex_vops3.c index a855f16..57cdebf 100644 --- a/ex_vops3.c +++ b/ex_vops3.c @@ -73,7 +73,7 @@ #ifndef lint #ifdef DOSCCS -static char sccsid[] = "@(#)ex_vops3.c 1.21 (gritter) 8/4/05"; +static char sccsid[] = "@(#)ex_vops3.c 1.19 (gritter) 1/2/05"; #endif #endif @@ -118,7 +118,7 @@ llfind(bool pastatom, int cnt, void (*f)(int), line *limit) register int c; #endif register int rc = 0; - char *save = smalloc(LBSIZE); + char save[LBSIZE]; /* * Initialize, saving the current line buffer state @@ -284,7 +284,6 @@ begin: #endif ret: strcLIN(save); - free(save); return (rc); } @@ -425,7 +424,7 @@ lmatchp(line *addr) void lsmatch(char *cp) { - char *save = smalloc(LBSIZE); + char save[LBSIZE]; register char *sp = save; register char *scurs = cursor; @@ -458,7 +457,6 @@ lsmatch(char *cp) wdot = 0; wcursor = 0; cursor = scurs; - free(save); } int @@ -676,7 +674,7 @@ vswitch(int cnt) mbuf[1+n1] = '\0'; macpush(mbuf, 1); } else { /* cnt > 1 */ - char *mbuf = smalloc(MAXDIGS + cnt*(mb_cur_max+1) + 5); + char *mbuf = malloc(MAXDIGS + cnt*(mb_cur_max+1) + 5); register char *p = &mbuf[MAXDIGS + 1]; int num, n0, n1, m; diff --git a/ex_vput.c b/ex_vput.c index 1851b67..f2c9c3f 100644 --- a/ex_vput.c +++ b/ex_vput.c @@ -73,7 +73,7 @@ #ifndef lint #ifdef DOSCCS -static char sccsid[] = "@(#)ex_vput.c 1.52 (gritter) 12/25/06"; +static char sccsid[] = "@(#)ex_vput.c 1.49 (gritter) 2/15/05"; #endif #endif @@ -223,7 +223,7 @@ vclrech(bool didphys) splitw = 0; didphys = 1; } - if (didphys && vtube) + if (didphys) vclrcell(vtube[WECHO], WCOLS); heldech = 0; } @@ -893,7 +893,7 @@ vnpins(int dosync) e = vglitchup(vcline, d); vigoto(e, 0); vclreol(); if (dosync) { - int (*Ooutchar)(int) = Outchar; + int (*Ooutchar)() = Outchar; Outchar = vputchar; vsync(e + 1); Outchar = Ooutchar; @@ -1454,19 +1454,14 @@ def: } } #ifdef MB - if (mb_cur_max > 1) { - if ((d = colsc(c&TRIM&~MULTICOL)) > 1) { + if (mb_cur_max > 1 && (d = colsc(c&TRIM&~MULTICOL)) > 1) { + if ((hold & HOLDPUPD) == 0) + *tp |= MULTICOL; + while (--d) { if ((hold & HOLDPUPD) == 0) - *tp |= MULTICOL; - while (--d) { - if ((hold & HOLDPUPD) == 0) - *++tp = MULTICOL; - destcol++; - outcol++; - } - } else if (d == 0) { - destcol--; - outcol--; + *++tp = MULTICOL; + destcol++; + outcol++; } } #endif /* MB */ diff --git a/exrecover.c b/exrecover.c index eda7870..cca2250 100644 --- a/exrecover.c +++ b/exrecover.c @@ -83,7 +83,7 @@ char *copyright = "@(#) Copyright (c) 1980 Regents of the University of California.\n\ All rights reserved.\n"; #endif -static char sccsid[] UNUSED = "@(#)exrecover.c 1.23 (gritter) 12/25/06"; +static char sccsid[] UNUSED = "@(#)exrecover.c 1.21 (gritter) 11/27/04"; #endif /* from exrecover.c 7.9.2 (2.11BSD) 1996/10/26 */ @@ -210,8 +210,6 @@ main(int argc, char *argv[]) #ifdef VMUNIX poolsbrk(0); #endif - linebuf = calloc(LBSIZE = BUFSIZ<4096?4096:BUFSIZ, sizeof *linebuf); - genbuf = calloc(MAXBSIZE, sizeof *genbuf); #ifdef LANGMSG setlocale(LC_MESSAGES, ""); catd = catopen(CATNAME, NL_CAT_LOCALE); @@ -428,7 +426,7 @@ listfiles(char *dirname) xfprintf(xstderr, catgets(catd, 2, 6, "No files saved.\n")); return; } - qsort(&svbuf[0], ecount, sizeof svbuf[0], (int(*)(const void *, const void *)) qucmp); + qsort(&svbuf[0], ecount, sizeof svbuf[0], (int(*)()) qucmp); for (fp = &svbuf[0]; fp < &svbuf[ecount]; fp++) { cp = ctime(&fp->sf_time); cp[10] = 0; @@ -449,7 +447,7 @@ enter(struct svfile *fp, char *fname, int count) { register char *cp, *cp2; register struct svfile *f, *fl; - time_t curtime; + time_t curtime, itol(); f = 0; if (count >= NENTRY) { @@ -841,15 +839,14 @@ getblock(line atl, int iof) return (obuff + off); if (iof == READ) { if (ichanged) - blkio(iblock, ibuff, - (ssize_t(*)(int, void *, size_t))write); + blkio(iblock, ibuff, (ssize_t(*)())write); ichanged = 0; iblock = bno; - blkio(bno, ibuff, (ssize_t(*)(int, void *, size_t))read); + blkio(bno, ibuff, (ssize_t(*)())read); return (ibuff + off); } if (oblock >= 0) - blkio(oblock, obuff, (ssize_t(*)(int, void *, size_t))write); + blkio(oblock, obuff, (ssize_t(*)())write); oblock = bno; return (obuff + off); } diff --git a/libuxre/COPYING.LGPL b/libuxre/COPYING.LGPL new file mode 100644 index 0000000..b1e3f5a --- /dev/null +++ b/libuxre/COPYING.LGPL @@ -0,0 +1,504 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + , 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff --git a/libuxre/Makefile b/libuxre/Makefile new file mode 100644 index 0000000..46d7320 --- /dev/null +++ b/libuxre/Makefile @@ -0,0 +1,12 @@ +CFLAGS = $(COPT) $(RPMCFLAGS) -I. +OBJS = bracket.o _collelem.o _collmult.o regcomp.o regdfa.o regerror.o regexec.o regfree.o regnfa.o regparse.o stubs.o + +.c.o: ; $(CC) $(CFLAGS) -c $< + +all: libuxre.a + +libuxre.a: $(OBJS) + ar cr libuxre.a $(OBJS) + +clean: + rm -f libuxre.a $(OBJS) core diff --git a/libuxre/NOTES b/libuxre/NOTES new file mode 100644 index 0000000..19aedf1 --- /dev/null +++ b/libuxre/NOTES @@ -0,0 +1,14 @@ +Notes for the modified 'UNIX(R) Regular Expression Library' +============================================================ + +The code this is based on was released by Caldera as 'osutils-0.1a' +and is available at . Notable +changes include: + +- Support for multibyte characters was enabled again. +- Support for traditional extended regular expression syntax was added. +- Fix: With REG_ICASE, [B-z] matches 'A', 'a', and '[' according to + POSIX.2. +- Some speed improvements. + + Gunnar Ritter 9/22/03 diff --git a/libuxre/_collelem.c b/libuxre/_collelem.c new file mode 100644 index 0000000..c5dbb05 --- /dev/null +++ b/libuxre/_collelem.c @@ -0,0 +1,119 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)_collelem.c 1.4 (gritter) 10/18/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include "colldata.h" +#include + +#define CCE(p) ((const CollElem *)(p)) +#define CCM(p) ((const CollMult *)(p)) + +LIBUXRE_STATIC const CollElem * +libuxre_collelem(struct lc_collate *col, CollElem *spare, wchar_t wc) +{ + const char *tbl; + size_t hi, lo, cur; + const CollMult *cmp; + const CollElem *cep; + long diff; + int sz; + + /* + * ELEM_ENCODED is returned when the collation is entirely + * based on the encoded value of the character. + */ + if (col == 0 || col->flags & CHF_ENCODED + || (tbl = (const char *)col->maintbl) == 0) + { + return ELEM_ENCODED; + } + if ((wuchar_type)wc <= UCHAR_MAX) + { + indexed:; + cep = CCE(&tbl[(wuchar_type)wc * col->elemsize]); + if (cep->weight[0] == WGHT_SPECIAL) + return ELEM_BADCHAR; + return cep; + } + if (col->flags & CHF_INDEXED) + { + if ((wuchar_type)wc >= col->nmain) + return ELEM_BADCHAR; + goto indexed; + } + /* + * Binary search for a match. Could speed up the search if + * some interpolation was used, but keep it simple for now. + * Note that this is actually a table of CollMult's. + * + * To save space in the file, sequences of similar elements + * are sometimes compressed into a single CollMult that + * describes many entries. This is denoted by a subnbeg + * with the SUBN_SPECIAL bit set. The rest of the bits give + * the range covered by this entry. + */ + sz = col->elemsize + (sizeof(CollMult) - sizeof(CollElem)); + tbl += (1 + UCHAR_MAX) * col->elemsize; + lo = 0; + hi = col->nmain - UCHAR_MAX; + while (lo < hi) + { + if ((cur = (hi + lo) >> 1) < lo) /* hi+lo overflowed */ + cur |= ~(~(size_t)0 >> 1); /* lost high order bit */ + cmp = CCM(&tbl[cur * sz]); + if ((diff = wc - cmp->ch) < 0) + hi = cur; + else if (cmp->elem.subnbeg & SUBN_SPECIAL) + { + if (diff > (long)(cmp->elem.subnbeg & ~SUBN_SPECIAL)) + lo = cur + 1; + else /* create an entry from the sequence in spare */ + { + spare->multbeg = cmp->elem.multbeg; + spare->subnbeg = 0; + spare->weight[0] = cmp->elem.weight[0] + diff; + for (lo = 1; lo < col->nweight; lo++) + { + wuchar_type w; + + if ((w = cmp->elem.weight[lo]) + == WGHT_SPECIAL) + { + w = spare->weight[0]; + } + spare->weight[lo] = w; + } + return spare; + } + } + else if (diff == 0) + return &cmp->elem; + else + lo = cur + 1; + } + return ELEM_BADCHAR; +} diff --git a/libuxre/_collmult.c b/libuxre/_collmult.c new file mode 100644 index 0000000..7a199b3 --- /dev/null +++ b/libuxre/_collmult.c @@ -0,0 +1,55 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)_collmult.c 1.4 (gritter) 9/22/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include "colldata.h" +#include + +#define CCM(p) ((const CollMult *)(p)) + +LIBUXRE_STATIC const CollElem * +libuxre_collmult(struct lc_collate *col, const CollElem *cep, wchar_t wc) +{ + const char *tbl; + size_t sz; + w_type ch; + + if (col == 0 || cep->multbeg == 0 + || (tbl = (const char *)col->multtbl) == 0) + { + return ELEM_BADCHAR; + } + sz = col->elemsize + (sizeof(CollMult) - sizeof(CollElem)); + tbl += sz * cep->multbeg; + while ((ch = CCM(tbl)->ch) != wc) + { + if (ch == 0) + return ELEM_BADCHAR; /* end of list */ + tbl += sz; + } + return &CCM(tbl)->elem; +} diff --git a/libuxre/bracket.c b/libuxre/bracket.c new file mode 100644 index 0000000..bc31b23 --- /dev/null +++ b/libuxre/bracket.c @@ -0,0 +1,829 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)bracket.c 1.14 (gritter) 10/18/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include +#include +#include +#include "re.h" + +/* +* Build and match the [...] part of REs. +* +* In general, each compiled bracket construct holds a set of mapped +* wide character values and a set of character classifications. +* The mapping applied (when the current LC_COLLATE is not CHF_ENCODED) +* is the "basic" weight (cep->weight[0]); otherwise the actual wide +* character is used. +* +* To support simplified range handling, this code assumes that a w_type, +* a signed integer type, can hold all valid basic weight values (as well +* as all wide character values for CHF_ENCODED locales) and that these +* are all positive. Negative values indicate error conditions (BKT_*); +* zero (which must be the same as WGHT_IGNORE) indicates success, but +* that the item installed is not a range endpoint. +*/ + +static int +addwide(Bracket *bp, wchar_t ord) +{ + unsigned int nw; + + if ((nw = bp->nwide) < NWIDE) + bp->wide[nw] = ord; + else + { + if (nw % NWIDE == 0 && (bp->exwide = + realloc(bp->exwide, nw * sizeof(wchar_t))) == 0) + { + return BKT_ESPACE; + } + nw -= NWIDE; + bp->exwide[nw] = ord; + } + bp->nwide++; + return 0; +} + +#if USHRT_MAX == 65535 /* have 16 bits */ +#define PLIND(n) ((n) >> 4) +#define PLBIT(n) (1 << ((n) & 0xf)) +#else +#define PLIND(n) ((n) / CHAR_BIT) +#define PLBIT(n) (1 << ((n) % CHAR_BIT)) +#endif + +#define RANGE ((wchar_t)'-') /* separates wide chars in ranges */ + +static int +addrange(Bracket *bp, wchar_t ord, w_type prev) +{ + int ret; + + if (prev > 0 && prev != ord) /* try for range */ + { + if (prev > ord) + { + if (bp->flags & BKT_ODDRANGE) /* prev only - done */ + return 0; + else if ((bp->flags & BKT_BADRANGE) == 0) + return BKT_ERANGE; + } + else + { + if (++prev <= UCHAR_MAX) /* "prev" already there */ + { + do + { + bp->byte[PLIND(prev)] |= PLBIT(prev); + if (prev == ord) + return 0; + } while (++prev <= UCHAR_MAX); + } + if ((ret = addwide(bp, prev)) != 0) + return ret; + if (++prev > ord) + return 0; + if (prev < ord && (ret = addwide(bp, RANGE)) != 0) + return ret; + return addwide(bp, ord); + } + } + if (ord <= UCHAR_MAX) + { + bp->byte[PLIND(ord)] |= PLBIT(ord); + return 0; + } + if (prev == ord) /* don't bother */ + return 0; + return addwide(bp, ord); +} + +static w_type +place(Bracket *bp, wchar_t wc, w_type prev, int mb_cur_max) +{ + const CollElem *cep; + CollElem spare; + int ret; + + if ((cep = libuxre_collelem(bp->col, &spare, wc)) != ELEM_ENCODED) + { + if (cep == ELEM_BADCHAR) + return BKT_BADCHAR; + wc = cep->weight[0]; + } + if ((ret = addrange(bp, wc, prev)) != 0) + return ret; + return wc; +} + +#ifndef CHARCLASS_NAME_MAX +# define CHARCLASS_NAME_MAX 127 +#endif + +static w_type +chcls(Bracket *bp, const unsigned char *s, int n) +{ + char clsstr[CHARCLASS_NAME_MAX + 1]; + unsigned int nt; + wctype_t wct; + + if (n > CHARCLASS_NAME_MAX) + return BKT_ECTYPE; + (void)memcpy(clsstr, s, n); + clsstr[n] = '\0'; + if ((wct = wctype(clsstr)) == 0) + return BKT_ECTYPE; + if ((nt = bp->ntype) < NTYPE) + bp->type[nt] = wct; + else + { + if (nt % NTYPE == 0 && (bp->extype = + realloc(bp->extype, nt * sizeof(wctype_t))) == 0) + { + return BKT_ESPACE; + } + nt -= NTYPE; + bp->extype[nt] = wct; + } + bp->ntype++; + return 0; /* cannot be end point of a range */ +} + + /* + * The purpose of mcce() and its Mcce structure is to locate + * the next full collation element from "wc" and "s". It is + * called both at compile and execute time. These two differ + * primarily in that at compile time there is an exact number + * of bytes to be consumed, while at execute time the longest + * valid collation element is to be found. + * + * When BKT_ONECASE is set, MCCEs become particularly messy. + * There is no guarantee that all possible combinations of + * upper/lower case are defined as MCCEs. Thus, this code + * tries both lower- and uppercase (in that order) for each + * character than might be part of an MCCE. + */ + +typedef struct +{ + const unsigned char *max; /* restriction by caller */ + const unsigned char *aft; /* longest successful */ + Bracket *bp; /* readonly */ + struct lc_collate *col; /* readonly */ + const CollElem *cep; /* entry matching longest */ + wchar_t ch; /* initial character (if any) */ + w_type wc; /* character matching "aft" */ +} Mcce; + +static int +mcce(Mcce *mcp, const CollElem *cep, const unsigned char *s, int mb_cur_max, + int compile_time) +{ + const CollElem *nxt; + CollElem spare; + w_type ch, wc; + int i; + + /* + * Get next character. + */ + if ((wc = mcp->ch) != '\0') + { + mcp->ch = '\0'; + } + else if (ISONEBYTE(wc = *s++)) + { + if (wc == '\0') + return 0; + } + else if ((i = libuxre_mb2wc(&wc, s)) > 0) + { + s += i; + if (mcp->max != 0 && s > mcp->max) + return 0; + } + else if (i < 0) + return BKT_ILLSEQ; + /* + * Try out the this character as part of an MCCE. + * If BKT_ONECASE is set, this code tries both the lower- and + * uppercase version, continuing if it matches so far. + */ + ch = wc; + if (mcp->bp->flags & BKT_ONECASE) + { + if ((wc = to_lower(wc)) == ch) + ch = to_upper(wc); + } + for (;;) /* at most twice */ + { + if (cep == ELEM_BADCHAR) /* first character */ + { + if ((nxt = libuxre_collelem(mcp->col, &spare, wc)) + == ELEM_ENCODED + || (mcp->col->flags & CHF_MULTICH) == 0 + || s == mcp->max) + { + mcp->aft = s; + mcp->cep = nxt; + mcp->wc = wc; + break; + } + } + else + { + nxt = libuxre_collmult(mcp->col, cep, wc); + } + if (nxt != ELEM_BADCHAR) + { + /* + * Okay so far. Record this collating element + * if it's really one (not WGHT_IGNORE) and + * we've reached a new high point or it's the + * first match. + * + * If there's a possibility for more, call mcce() + * recursively for the subsequent characters. + */ + if (nxt->weight[0] != WGHT_IGNORE + && (mcp->aft < s || mcp->cep == ELEM_BADCHAR)) + { + mcp->aft = s; + mcp->cep = nxt; + mcp->wc = wc; + } + if (nxt->multbeg != 0 + && (mcp->max == 0 || s < mcp->max)) + { + if ((i = mcce(mcp, nxt, s, mb_cur_max, + compile_time)) != 0) + return i; + } + } + if (wc == ch) + break; + wc = ch; + } + return 0; +} + +static w_type +eqcls(Bracket *bp, const unsigned char *s, int n, w_type prev, int mb_cur_max) +{ + w_type last; + Mcce mcbuf; + int err; + + mcbuf.max = &s[n]; + mcbuf.aft = &s[0]; + mcbuf.bp = bp; + mcbuf.col = bp->col; + mcbuf.cep = ELEM_BADCHAR; + mcbuf.ch = '\0'; + if ((err = mcce(&mcbuf, ELEM_BADCHAR, s, mb_cur_max, 1)) != 0) + return err; + if (mcbuf.cep == ELEM_BADCHAR || mcbuf.aft != mcbuf.max) + return BKT_EEQUIV; + last = mcbuf.wc; + if (mcbuf.cep != ELEM_ENCODED && mcbuf.col->nweight > 1) + { + const CollElem *cep; + + /* + * The first and last weight[0] values for equivalence + * classes are stuffed into the terminator for the + * multiple character lists. If these values are + * scattered (elements that are not part of this + * equivalence class have weight[0] values between the + * two end points), then SUBN_SPECIAL is placed in + * this terminator. Note that weight[1] of the + * terminator must be other than WGHT_IGNORE, too. + */ + last = mcbuf.cep->weight[0]; + if ((cep = libuxre_collmult(bp->col, mcbuf.cep, 0)) + != ELEM_BADCHAR + && cep->weight[1] != WGHT_IGNORE) + { + last = cep->weight[1]; + if (cep->subnbeg == SUBN_SPECIAL) + { + unsigned int nq; + + /* + * Permit ranges up to the first and + * after the last. + */ + if (prev > 0 && prev != cep->weight[0] + && (prev = addrange(bp, + cep->weight[0], prev)) != 0) + { + return prev; + } + /* + * Record the equivalence class by storing + * the primary weight. + */ + if ((nq = bp->nquiv) < NQUIV) + bp->quiv[nq] = mcbuf.cep->weight[1]; + else + { + if (nq % NQUIV == 0 && (bp->exquiv = + realloc(bp->exquiv, + nq * sizeof(wuchar_type))) + == 0) + { + return REG_ESPACE; + } + nq -= NQUIV; + bp->exquiv[nq] = mcbuf.cep->weight[1]; + } + bp->nquiv++; + return last; + } + mcbuf.cep = cep; + } + mcbuf.wc = mcbuf.cep->weight[0]; + } + /* + * Determine range, if any, to install. + * + * If there's a pending low (prev > 0), then try to use it. + * + * Otherwise, try to use mcbuf.wc as the low end of the range. + * Since addrange() assumes that the low point has already been + * placed, we try to fool it by using a prev of one less than + * mcbuf.wc. But, if that value would not look like a valid + * low point of a range, we have to explicitly place mcbuf.wc. + */ + if (prev <= 0 && (prev = mcbuf.wc - 1) <= 0) + { + if ((prev = addrange(bp, mcbuf.wc, 0)) != 0) + return prev; + } + if ((mcbuf.wc = addrange(bp, last, prev)) != 0) + return mcbuf.wc; + return last; +} + +static w_type +clsym(Bracket *bp, const unsigned char *s, int n, w_type prev, int mb_cur_max) +{ + Mcce mcbuf; + int err; + + mcbuf.max = &s[n]; + mcbuf.aft = &s[0]; + mcbuf.bp = bp; + mcbuf.col = bp->col; + mcbuf.cep = ELEM_BADCHAR; + mcbuf.ch = '\0'; + if ((err = mcce(&mcbuf, ELEM_BADCHAR, s, mb_cur_max, 1)) != 0) + return err; + if (mcbuf.cep == ELEM_BADCHAR || mcbuf.aft != mcbuf.max) + return BKT_ECOLLATE; + if (mcbuf.cep != ELEM_ENCODED) + mcbuf.wc = mcbuf.cep->weight[0]; + if ((err = addrange(bp, mcbuf.wc, prev)) != 0) + return err; + return mcbuf.wc; +} + + /* + * Scans the rest of a bracket construction within a regular + * expression and fills in a description for it. + * The leading [ and the optional set complement indicator + * were handled already by the caller. + * Returns: + * <0 error (a BKT_* value) + * >0 success; equals how many bytes were scanned. + */ +LIBUXRE_STATIC int +libuxre_bktmbcomp(Bracket *bp, const unsigned char *pat0, + int flags, int mb_cur_max) +{ + static const Bracket zero = {0}; + const unsigned char *pat = pat0; + struct lc_collate *savecol; + w_type n, wc, prev = 0; + + /* + * Set represented set to empty. Easiest to copy an empty + * version over the caller's, (re)setting col and flags. + */ + savecol = bp->col; + *bp = zero; + bp->col = savecol; + bp->flags = flags + & (BKT_NEGATED | BKT_ONECASE | BKT_NOTNL | BKT_BADRANGE | + BKT_ODDRANGE); + /* + * Handle optional "empty" brackets; typically only used + * in combination with BKT_QUOTE or BKT_ESCAPE. + */ + if ((wc = *pat) == ']' && (flags & BKT_EMPTY) != 0) + return 1; + /* + * Populate *bp. + */ + for (;; prev = n) + { + switch (wc) + { + case '\0': + ebrack:; + n = BKT_EBRACK; + goto err; + case '\n': + if (flags & BKT_NLBAD) + goto ebrack; + goto regular; + case '/': + if (flags & BKT_SLASHBAD) + goto ebrack; + goto regular; + case '\\': + if ((flags & (BKT_ESCAPE | BKT_QUOTE + | BKT_ESCNL | BKT_ESCSEQ)) == 0) + { + goto regular; + } + switch (wc = *++pat) + { + default: + noesc:; + if ((flags & BKT_ESCAPE) == 0) + { + wc = '\\'; + pat--; + } + break; + case '\\': + case ']': + case '-': + case '^': + if ((flags & BKT_QUOTE) == 0) + goto noesc; + break; + case 'a': + if ((flags & BKT_ESCSEQ) == 0 || + (flags & BKT_OLDESC)) + goto noesc; + wc = '\a'; + break; + case 'b': + if ((flags & BKT_ESCSEQ) == 0) + goto noesc; + wc = '\b'; + break; + case 'f': + if ((flags & BKT_ESCSEQ) == 0) + goto noesc; + wc = '\f'; + break; + case 'n': + if ((flags & (BKT_ESCSEQ | BKT_ESCNL)) == 0) + goto noesc; + wc = '\n'; + break; + case 'r': + if ((flags & BKT_ESCSEQ) == 0) + goto noesc; + wc = '\r'; + break; + case 't': + if ((flags & BKT_ESCSEQ) == 0) + goto noesc; + wc = '\t'; + break; + case 'v': + if ((flags & BKT_ESCSEQ) == 0 || + (flags & BKT_OLDESC)) + goto noesc; + wc = '\v'; + break; + case 'x': + if ((flags & BKT_ESCSEQ) == 0 || + (flags & BKT_OLDESC)) + goto noesc; + if (!isxdigit(wc = *++pat)) + { + pat--; + goto noesc; + } + /* + * Take as many hex digits as possible, + * ignoring overflows. + * Any positive result is okay. + */ + n = 0; + do + { + if (isdigit(wc)) + wc -= '0'; + else if (isupper(wc)) + wc -= 'A' + 10; + else + wc -= 'a' + 10; + n <<= 4; + n |= wc; + } while (isxdigit(wc = *++pat)); + pat--; + if ((wc = n) <= 0) + { + n = BKT_BADESC; + goto err; + } + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if ((flags & BKT_ESCSEQ) == 0 || + (flags & BKT_OLDESC)) + goto noesc; + /* + * For compatibility (w/awk), + * permit "octal" 8 and 9. + */ + n = wc - '0'; + if ((wc = *++pat) >= '0' && wc <= '9') + { + n <<= 3; + n += wc - '0'; + if ((wc = *++pat) >= '0' && wc <= '9') + { + n <<= 3; + n += wc - '0'; + } + } + pat--; + if ((wc = n) <= 0) + { + n = BKT_BADESC; + goto err; + } + break; + } + goto regular; + case '[': + if (((wc = *++pat) == ':' || wc == '=' || wc == '.') && + (flags & BKT_NOI18N) == 0) + { + n = 0; + while (*++pat != wc || pat[1] != ']') + { + if (*pat == '\0') + { + badpat:; + n = BKT_BADPAT; + goto err; + } + else if (*pat == '/') + { + if (flags & BKT_SLASHBAD) + goto badpat; + } + else if (*pat == '\n') + { + if (flags & BKT_NLBAD) + goto badpat; + } + n++; + } + if (n == 0) + { + n = BKT_EMPTYSUBBKT; + goto err; + } + if (wc == ':') + n = chcls(bp, &pat[-n], n); + else if (wc == '=') + n = eqcls(bp, &pat[-n], n, prev, + mb_cur_max); + else /* wc == '.' */ + n = clsym(bp, &pat[-n], n, prev, + mb_cur_max); + pat++; + break; + } + wc = '['; + pat--; + goto regular; + default: + if (!ISONEBYTE(wc) && + (n = libuxre_mb2wc(&wc, pat + 1)) > 0) + pat += n; + regular:; + n = place(bp, wc, prev, mb_cur_max); + break; + } + if (n < 0) { + n = BKT_ILLSEQ; + goto err; + } + if ((wc = *++pat) == ']') + break; + if (wc == '-' && n != 0) + { + if (prev == 0 || (flags & BKT_SEPRANGE) == 0) + { + if ((wc = *++pat) != ']') + continue; /* valid range */ + wc = '-'; + pat--; + } + } + n = 0; /* no range this time */ + } + return pat - pat0 + 1; +err:; + libuxre_bktfree(bp); + return n; +} + +LIBUXRE_STATIC void +libuxre_bktfree(Bracket *bp) +{ + if (bp->extype != 0) + free(bp->extype); + if (bp->exquiv != 0) + free(bp->exquiv); + if (bp->exwide != 0) + free(bp->exwide); +} + +LIBUXRE_STATIC int +libuxre_bktmbexec(Bracket *bp, wchar_t wc, + const unsigned char *str, int mb_cur_max) +{ + unsigned int i; + wchar_t lc, uc; + Mcce mcbuf; + + mcbuf.aft = str; /* in case of match in character classes */ + mcbuf.ch = wc; + /* + * First: check the single wc against any character classes. + * Since multiple character collating elements are not part + * of this world, they don't apply here. + */ + if ((i = bp->ntype) != 0) + { + wctype_t *wctp = &bp->type[0]; + + if (bp->flags & BKT_ONECASE) + { + if ((wc = to_lower(wc)) == mcbuf.ch) + mcbuf.ch = to_upper(wc); + } + for (;;) + { + if (iswctype(mb_cur_max==1?btowc(wc):wc, *wctp)) + goto match; + if (wc != mcbuf.ch && + iswctype(mb_cur_max==1?btowc(mcbuf.ch):mcbuf.ch, + *wctp)) + goto match; + if (--i == 0) + break; + if (++wctp == &bp->type[NTYPE]) + wctp = &bp->extype[0]; + } + } + /* + * The main match is determined by the weight[0] value + * of the character (or characters, if the input can be + * taken as a multiple character collating element). + */ + mcbuf.max = 0; + mcbuf.bp = bp; + mcbuf.col = bp->col; + mcbuf.cep = ELEM_BADCHAR; + mcce(&mcbuf, ELEM_BADCHAR, str, mb_cur_max, 0); + if (mcbuf.cep == ELEM_BADCHAR) + return -1; /* never matches */ + if (mcbuf.cep != ELEM_ENCODED) + mcbuf.wc = mcbuf.cep->weight[0]; + /* + * POSIX.2 demands that both a character and its case counterpart + * can match if REG_ICASE is set. This means that [B-z] matches + * 'A', 'a', and '['. + */ + if (bp->flags & BKT_ONECASE) + { + lc = to_lower(mcbuf.wc); + uc = to_upper(mcbuf.wc); + } + else + lc = uc = mcbuf.wc; + /* + * See if it's in the set. Note that the list of true wide + * character values has explicit ranges. + */ + if (mcbuf.wc <= UCHAR_MAX) + { + if (bp->byte[PLIND(lc)] & PLBIT(lc)) + goto match; + if (lc != uc && (bp->byte[PLIND(uc)] & PLBIT(uc))) + goto match; + } + else if ((i = bp->nwide) != 0) + { + wchar_t *wcp = &bp->wide[0]; + long lcmp, ucmp; + + for (;;) + { + if ((lcmp = lc - *wcp) == 0) + goto match; + ucmp = uc - *wcp; + if (lc != uc && ucmp == 0) + goto match; + if (--i == 0) + break; + if (++wcp == &bp->wide[NWIDE]) + wcp = &bp->exwide[0]; + if (*wcp == RANGE) + { + if (++wcp == &bp->wide[NWIDE]) + wcp = &bp->exwide[0]; + if (lcmp > 0 && lc <= *wcp) + goto match; + if (lc != uc && ucmp > 0 && uc < *wcp) + goto match; + if ((i -= 2) == 0) + break; + if (++wcp == &bp->wide[NWIDE]) + wcp = &bp->exwide[0]; + } + } + } + /* + * The last chance for a match is if an equivalence class + * was specified for which the primary weights are scattered + * through the weight[0]s. + */ + if ((i = bp->nquiv) != 0 && mcbuf.cep != ELEM_ENCODED) + { + wuchar_type *wucp = &bp->quiv[0]; + + mcbuf.wc = mcbuf.cep->weight[1]; + for (;;) + { + if (mcbuf.wc == *wucp) + goto match; + if (--i == 0) + break; + if (++wucp == &bp->quiv[NQUIV]) + wucp = &bp->exquiv[0]; + } + } + /* + * Only here when no match against the set was found. + * One final special case w/r/t newline. + */ + if (bp->flags & BKT_NEGATED) + { + if (wc != '\n' || (bp->flags & BKT_NOTNL) == 0) + return mcbuf.aft - str; + } + return -1; +match:; + /* + * Only here when a match against the described set is found. + */ + if (bp->flags & BKT_NEGATED) + return -1; + return mcbuf.aft - str; +} diff --git a/libuxre/colldata.h b/libuxre/colldata.h new file mode 100644 index 0000000..e3a3784 --- /dev/null +++ b/libuxre/colldata.h @@ -0,0 +1,226 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)colldata.h 1.5 (gritter) 5/1/04 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef LIBUXRE_COLLDATA_H +#define LIBUXRE_COLLDATA_H + +typedef struct +{ + long coll_offst; /* offset to xnd table */ + long sub_cnt; /* length of subnd table */ + long sub_offst; /* offset to subnd table */ + long str_offst; /* offset to strings for subnd table */ + long flags; /* nonzero if reg.exp. used */ +} hd; + +typedef struct +{ + unsigned char ch; /* character or number of followers */ + unsigned char pwt; /* primary weight */ + unsigned char swt; /* secondary weight */ + unsigned char ns; /* index of follower state list */ +} xnd; + +typedef struct +{ + char *exp; /* expression to be replaced */ + long explen; /* length of expression */ + char *repl; /* replacement string */ +} subnd; + +/*----------------------------------*/ + +#include +#include +/* #include */ + +/* +* Structure of a collation file: +* 1. CollHead (maintbl is 0 if CHF_ENCODED) +* if !CHF_ENCODED then +* 2. CollElem[bytes] (256 for 8 bit bytes) +* 3. if CHF_INDEXED then +* CollElem[wides] (nmain-256 for 8 bit bytes) +* else +* CollMult[wides] +* 4. CollMult[*] (none if multtbl is 0) +* 5. wuchar_type[*] (none if repltbl is 0) +* 6. CollSubn[*] (none if subntbl is 0) +* 7. strings (first is pathname for .so if CHF_DYNAMIC) +* +* The actual location of parts 2 through 7 is not important. +* +* The main table is in encoded value order. +* +* All indeces/offsets must be nonzero to be effective; zero is reserved +* to indicate no-such-entry. This implies either that an unused initial +* entry is placed in each of (4) through (7), or that the "start offset" +* given by the header is artificially pushed back by an entry size. +* +* Note that if CHF_ENCODED is not set, then nweight must be positive. +* +* If an element can begin a multiple character element, it contains a +* nonzero multbeg which is the initial index into (4) for its list; +* the list is terminated by a CollMult with a ch of zero. +* +* If there are elements with the same primary weight (weight[1]), then +* for each such element, it must have a CollMult list. The CollMult +* that terminates the list (ch==0) notes the lowest and highest basic +* weights for those elements with that same primary weight value +* respectively in weight[0] and weight[1]. If there are some basic +* weights between these values that do not have the same primary +* weight--are not in the equivalence class--then the terminator also +* has a SUBN_SPECIAL mark. Note that this list terminator should be +* shared when the elements are not multiple character collating +* elements because they wouldn't otherwise have a CollMult list. +* +* WGHT_IGNORE is used to denote ignored collating elements for a +* particular collation ordering pass. All main table entries other +* than for '\0' will have a non-WGHT_IGNORE weight[0]. However, it is +* possible for a CollMult entries from (4) to have a WGHT_IGNORE +* weight[0]: If, for example, "xyz" is a multiple character collating +* element, but "xy" is not, then the CollMult for "y" will have a +* WGHT_IGNORE weight[0]. Also, WGHT_IGNORE is used to terminate each +* list of replacement weights. +* +* Within (3), it is possible to describe a sequence of unremarkable +* collating elements with a single CollMult entry. If the SUBN_SPECIAL +* bit is set, the rest of subnbeg represents the number of collating +* elements covered by this entry. The weight[0] values are determined +* by adding the difference between the encoded value and the entry's ch +* value to the entry's weight[0]. This value is then substituted for +* any weight[n], n>0 that has only the WGHT_SPECIAL bit set. libuxre_collelem() +* hides any match to such an entry by filling in a "spare" CollElem. +* +* If there are substitution strings, then for each character that begins +* a string, it has a nonzero subnbeg which is similarly the initial +* index into (6). The indeces in (6) refer to offsets within (7). +*/ + +#define TOPBIT(t) (((t)1) << (sizeof(t) * CHAR_BIT - 1)) + +#define CHF_ENCODED 0x1 /* collation by encoded values only */ +#define CHF_INDEXED 0x2 /* main table indexed by encoded values */ +#define CHF_MULTICH 0x4 /* a multiple char. coll. elem. exists */ +#define CHF_DYNAMIC 0x8 /* shared object has collation functions */ + +#define CWF_BACKWARD 0x1 /* reversed ordering for this weight */ +#define CWF_POSITION 0x2 /* weight takes position into account */ + +#define CLVERS 1 /* most recent version */ + +#define WGHT_IGNORE 0 /* ignore this collating element */ +#define WGHT_SPECIAL TOPBIT(wuchar_type) +#define SUBN_SPECIAL TOPBIT(unsigned short) + +#ifndef COLL_WEIGHTS_MAX +#define COLL_WEIGHTS_MAX 1 +#endif + +typedef struct +{ + unsigned long maintbl; /* start of main table */ + unsigned long multtbl; /* start of multi-char table */ + unsigned long repltbl; /* start of replacement weights */ + unsigned long subntbl; /* start of substitutions */ + unsigned long strstbl; /* start of sub. strings */ + unsigned long nmain; /* # entries in main table */ + unsigned short flags; /* CHF_* bits */ + unsigned short version; /* handle future changes */ + unsigned char elemsize; /* # bytes/element (w/padding) */ + unsigned char nweight; /* # weights/element */ + unsigned char order[COLL_WEIGHTS_MAX]; /* CWF_* bits/weight */ +} CollHead; + +typedef struct +{ + unsigned short multbeg; /* start of multi-chars */ + unsigned short subnbeg; /* start of substitutions */ + wuchar_type weight[COLL_WEIGHTS_MAX]; +} CollElem; + +typedef struct +{ + wchar_t ch; /* "this" character (of sequence) */ + CollElem elem; /* its full information */ +} CollMult; + +typedef struct +{ + unsigned short strbeg; /* start of match string */ + unsigned short length; /* length of match string */ + unsigned short repbeg; /* start of replacement */ +} CollSubn; + +struct lc_collate +{ + const unsigned char *strstbl; + const wuchar_type *repltbl; + const CollElem *maintbl; + const CollMult *multtbl; + const CollSubn *subntbl; +#ifdef DSHLIB + void *handle; + void (*done)(struct lc_collate *); + int (*strc)(struct lc_collate *, const char *, const char *); + int (*wcsc)(struct lc_collate *, const wchar_t *, const wchar_t *); + size_t (*strx)(struct lc_collate *, char *, const char *, size_t); + size_t (*wcsx)(struct lc_collate *, wchar_t *, const wchar_t *, size_t); +#endif + const char *mapobj; + size_t mapsize; + unsigned long nmain; + short nuse; + unsigned short flags; + unsigned char elemsize; + unsigned char nweight; + unsigned char order[COLL_WEIGHTS_MAX]; +}; + +#define ELEM_BADCHAR ((CollElem *)0) +#define ELEM_ENCODED ((CollElem *)-1) + +/* +LIBUXRE_STATIC int libuxre_old_collate(struct lc_collate *); +LIBUXRE_STATIC int libuxre_strqcoll(struct lc_collate *, const char *, + const char *); +LIBUXRE_STATIC int libuxre_wcsqcoll(struct lc_collate *, const wchar_t *, + const wchar_t *); +*/ +extern struct lc_collate *libuxre_lc_collate(struct lc_collate *); +LIBUXRE_STATIC const CollElem *libuxre_collelem(struct lc_collate *, + CollElem *, wchar_t); +LIBUXRE_STATIC const CollElem *libuxre_collmult(struct lc_collate *, + const CollElem *, wchar_t); +/* +LIBUXRE_STATIC const CollElem *libuxre_collmbs(struct lc_collate *, + CollElem *, const unsigned char **); +LIBUXRE_STATIC const CollElem *libuxre_collwcs(struct lc_collate *, + CollElem *, const wchar_t **); +*/ + +#endif /* !LIBUXRE_COLLDATA_H */ diff --git a/libuxre/onefile.c b/libuxre/onefile.c new file mode 100644 index 0000000..78f22a0 --- /dev/null +++ b/libuxre/onefile.c @@ -0,0 +1,38 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)onefile.c 1.1 (gritter) 9/22/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define LIBUXRE_STATIC static + +#include "_collelem.c" +#include "_collmult.c" +#include "stubs.c" +#include "bracket.c" +#include "regdfa.c" +#include "regnfa.c" +#include "regparse.c" +#include "regcomp.c" +#include "regexec.c" diff --git a/libuxre/re.h b/libuxre/re.h new file mode 100644 index 0000000..2738a05 --- /dev/null +++ b/libuxre/re.h @@ -0,0 +1,228 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)re.h 1.15 (gritter) 2/6/05 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef LIBUXRE_RE_H +#define LIBUXRE_RE_H + + /* + * Maps safe external tag to internal one + */ +#define re_coll_ lc_collate /* */ +/* #define __fnm_collate lc_collate */ /* */ + +#include +#include +/* #include */ +#include + +#define NBSHT (sizeof(unsigned short) * CHAR_BIT) +#define NBYTE (((1 << CHAR_BIT) + NBSHT - 1) / NBSHT) +#define NTYPE 4 +#define NWIDE 32 +#define NQUIV 4 + +typedef struct +{ + struct lc_collate *col; /* only member set by caller */ + wctype_t *extype; + wuchar_type *exquiv; + wchar_t *exwide; + wctype_t type[NTYPE]; + wuchar_type quiv[NQUIV]; + wchar_t wide[NWIDE]; + unsigned short byte[NBYTE]; + unsigned short ntype; + unsigned short nquiv; + unsigned short nwide; + unsigned int flags; +} Bracket; + +#define BKT_NEGATED 0x001 /* complemented set */ +#define BKT_ONECASE 0x002 /* uppercase same as lowercase */ +#define BKT_NOTNL 0x004 /* do not match newline when BKT_NEGATED */ +#define BKT_BADRANGE 0x008 /* accept [m-a] ranges as [ma] */ +#define BKT_SEPRANGE 0x010 /* disallow [a-m-z] style ranges */ +#define BKT_NLBAD 0x020 /* newline disallowed */ +#define BKT_SLASHBAD 0x040 /* slash disallowed (for pathnames) */ +#define BKT_EMPTY 0x080 /* take leading ] is end (empty set) */ +#define BKT_ESCAPE 0x100 /* allow \ as quote for next anything */ +#define BKT_QUOTE 0x200 /* allow \ as quote for \\, \^, \- or \] */ +#define BKT_ESCNL 0x400 /* take \n as the newline character */ +#define BKT_ESCSEQ 0x800 /* otherwise, take \ as in C escapes */ +#define BKT_ODDRANGE 0x1000 /* oawk oddity: [m-a] means [m] */ +#define BKT_NOI18N 0x2000 /* disable [::] [==] [..] */ +#define BKT_OLDESC 0x4000 /* enable \b \f \n \r \t only */ + + /* + * These error returns for libuxre_bktmbcomp() are directly tied to + * the error returns for regcomp() for convenience. + */ +#define BKT_BADPAT (-REG_BADPAT) +#define BKT_ECOLLATE (-REG_ECOLLATE) +#define BKT_ECTYPE (-REG_ECTYPE) +#define BKT_EEQUIV (-REG_EEQUIV) +#define BKT_BADCHAR (-REG_EBKTCHAR) +#define BKT_EBRACK (-REG_EBRACK) +#define BKT_EMPTYSUBBKT (-REG_EMPTYSUBBKT) +#define BKT_ERANGE (-REG_ERANGE) +#define BKT_ESPACE (-REG_ESPACE) +#define BKT_BADESC (-REG_BADESC) +#define BKT_ILLSEQ (-REG_ILLSEQ) + + /* + * These must be distinct from the flags in . + */ +#define FNM_COLLATE 0x2000 /* have collation information */ +#define FNM_CURRENT 0x4000 /* have full-sized fnm_t structure */ + + /* + * These must be distinct from the flags in . + */ +#define REG_NFA 0x20000000 +#define REG_DFA 0x40000000 +#define REG_GOTBKT 0x80000000 + +#define BRACE_INF USHRT_MAX +#define BRACE_MAX 5100 /* arbitrary number < SHRT_MAX */ +#define BRACE_DFAMAX 255 /* max amount for r.e. duplication */ + +typedef union /* extra info always kept for some tokens/nodes */ +{ + Bracket *bkt; /* ROP_BKT */ + size_t sub; /* ROP_LP (ROP_RP), ROP_REF */ + unsigned short num[2]; /* ROP_BRACE: num[0]=low, num[1]=high */ +} Info; + +typedef struct /* lexical context while parsing */ +{ + Info info; + const unsigned char *pat; + unsigned char *clist; + struct lc_collate *col; + unsigned long flags; + w_type tok; + size_t maxref; + size_t nleft; + size_t nright; + size_t nclist; + int bktflags; + int err; + int mb_cur_max; +} Lex; + +typedef struct t_tree Tree; /* RE parse tree node */ +struct t_tree +{ + union + { + Tree *ptr; /* unary & binary nodes */ + size_t pos; /* position for DFA leaves */ + } left; + union + { + Tree *ptr; /* binary nodes */ + Info info; + } right; + Tree *parent; + w_type op; /* positive => char. to match */ +}; + +typedef struct re_dfa_ Dfa; /* DFA engine description */ +typedef struct re_nfa_ Nfa; /* NFA engine description */ + +typedef struct +{ + const unsigned char *str; + regmatch_t *match; + size_t nmatch; + unsigned long flags; + int mb_cur_max; +} Exec; + + /* + * Regular expression operators. Some only used internally. + * All are negative, to distinguish them from the regular + * "match this particular wide character" operation. + */ +#define BINARY_ROP 0x02 +#define UNARY_ROP 0x01 +#define LEAF_ROP 0x00 + +#define MAKE_ROP(k, v) (-((v) | ((k) << 4))) +#define KIND_ROP(v) ((-(v)) >> 4) + +#define ROP_OR MAKE_ROP(BINARY_ROP, 1) +#define ROP_CAT MAKE_ROP(BINARY_ROP, 2) + +#define ROP_STAR MAKE_ROP(UNARY_ROP, 1) +#define ROP_PLUS MAKE_ROP(UNARY_ROP, 2) +#define ROP_QUEST MAKE_ROP(UNARY_ROP, 3) +#define ROP_BRACE MAKE_ROP(UNARY_ROP, 4) +#define ROP_LP MAKE_ROP(UNARY_ROP, 5) +#define ROP_RP MAKE_ROP(UNARY_ROP, 6) + +#define ROP_NOP MAKE_ROP(LEAF_ROP, 1) /* temporary */ +#define ROP_BOL MAKE_ROP(LEAF_ROP, 2) /* ^ anchor */ +#define ROP_EOL MAKE_ROP(LEAF_ROP, 3) /* $ anchor */ +#define ROP_ALL MAKE_ROP(LEAF_ROP, 4) /* anything (added) */ +#define ROP_ANYCH MAKE_ROP(LEAF_ROP, 5) /* . w/\n */ +#define ROP_NOTNL MAKE_ROP(LEAF_ROP, 6) /* . w/out \n */ +#define ROP_EMPTY MAKE_ROP(LEAF_ROP, 7) /* empty string */ +#define ROP_NONE MAKE_ROP(LEAF_ROP, 8) /* match failure */ +#define ROP_BKT MAKE_ROP(LEAF_ROP, 9) /* [...] */ +#define ROP_BKTCOPY MAKE_ROP(LEAF_ROP, 10) /* [...] (duplicated) */ +#define ROP_LT MAKE_ROP(LEAF_ROP, 11) /* \< word begin */ +#define ROP_GT MAKE_ROP(LEAF_ROP, 12) /* \> word end */ +#define ROP_REF MAKE_ROP(LEAF_ROP, 13) /* \digit */ +#define ROP_END MAKE_ROP(LEAF_ROP, 14) /* final (added) */ + + /* + * Return values: + * libuxre_bktmbcomp() + * <0 error (see BKT_* above); >0 #bytes scanned + * libuxre_bktmbexec() + * <0 doesn't match; >=0 matches, #extra bytes scanned + */ +LIBUXRE_STATIC void libuxre_bktfree(Bracket *); +LIBUXRE_STATIC int libuxre_bktmbcomp(Bracket *, const unsigned char *, + int, int); +LIBUXRE_STATIC int libuxre_bktmbexec(Bracket *, wchar_t, + const unsigned char *, int); + +LIBUXRE_STATIC void libuxre_regdeltree(Tree *, int); +LIBUXRE_STATIC Tree *libuxre_reg1tree(w_type, Tree *); +LIBUXRE_STATIC Tree *libuxre_reg2tree(w_type, Tree *, Tree *); +LIBUXRE_STATIC Tree *libuxre_regparse(Lex *, const unsigned char *, int); + +extern void libuxre_regdeldfa(Dfa *); +LIBUXRE_STATIC int libuxre_regdfacomp(regex_t *, Tree *, Lex *); +LIBUXRE_STATIC int libuxre_regdfaexec(Dfa *, Exec *); + +extern void libuxre_regdelnfa(Nfa *); +LIBUXRE_STATIC int libuxre_regnfacomp(regex_t *, Tree *, Lex *); +LIBUXRE_STATIC int libuxre_regnfaexec(Nfa *, Exec *); +#endif /* !LIBUXRE_RE_H */ diff --git a/libuxre/regcomp.c b/libuxre/regcomp.c new file mode 100644 index 0000000..20a197d --- /dev/null +++ b/libuxre/regcomp.c @@ -0,0 +1,77 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regcomp.c 1.6 (gritter) 9/22/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include "re.h" + +/* #pragma weak regcomp = _regcomp */ + +int +regcomp(regex_t *ep, const char *pat, int flags) +{ + Tree *tp; + Lex lex; + + if ((tp=libuxre_regparse(&lex, (const unsigned char *)pat, flags)) == 0) + goto out; + ep->re_nsub = lex.nleft; + ep->re_flags = lex.flags & ~(REG_NOTBOL | REG_NOTEOL | REG_NONEMPTY); + ep->re_col = lex.col; + ep->re_mb_cur_max = lex.mb_cur_max; + /* + * Build the engine(s). The factors determining which are built: + * 1. If the pattern built insists on an NFA, then only build NFA. + * 2. If flags include REG_NOSUB or REG_ONESUB and not (1), + * then only build DFA. + * 3. Otherwise, build both. + * Since libuxre_regdfacomp() modifies the tree and libuxre_regnfacomp() + * doesn't, libuxre_regnfacomp() must be called first, if both are to + * be called. + */ + if (ep->re_nsub != 0 && (flags & (REG_NOSUB | REG_ONESUB)) == 0 + || lex.flags & REG_NFA) + { + ep->re_flags |= REG_NFA; + if ((lex.err = libuxre_regnfacomp(ep, tp, &lex)) != 0) + goto out; + } + if ((lex.flags & REG_NFA) == 0) + { + ep->re_flags |= REG_DFA; + if ((lex.err = libuxre_regdfacomp(ep, tp, &lex)) != 0) + { + if (ep->re_flags & REG_NFA) + libuxre_regdelnfa(ep->re_nfa); + } + } +out:; + if (lex.err != 0 && lex.col != 0) + (void)libuxre_lc_collate(lex.col); + if (tp != 0) + libuxre_regdeltree(tp, lex.err); + return lex.err; +} diff --git a/libuxre/regdfa.c b/libuxre/regdfa.c new file mode 100644 index 0000000..8142e8d --- /dev/null +++ b/libuxre/regdfa.c @@ -0,0 +1,877 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regdfa.c 1.9 (gritter) 9/22/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include +#include +#include +#include "regdfa.h" + +/* +* Deterministic Finite Automata. +*/ + + /* + * Postorder traversal that returns a copy of the subtree, + * except that ROP_BKT becomes ROP_BKTCOPY (since they + * share the same pointed to Bracket object). + */ +static Tree * +copy(regex_t *ep, Tree *tp) +{ + Tree *np; + + if ((np = malloc(sizeof(Tree))) == 0) + return 0; + switch (np->op = tp->op) /* almost always correct */ + { + case ROP_BKT: + np->op = ROP_BKTCOPY; + /*FALLTHROUGH*/ + case ROP_BKTCOPY: + np->right.info.bkt = tp->right.info.bkt; + /*FALLTHROUGH*/ + default: + np->left.pos = ep->re_dfa->nposn++; + /*FALLTHROUGH*/ + case ROP_EMPTY: + return np; + case ROP_CAT: + case ROP_OR: + if ((np->right.ptr = copy(ep, tp->right.ptr)) == 0) + { + free(np); + return 0; + } + np->right.ptr->parent = np; + /*FALLTHROUGH*/ + case ROP_STAR: + case ROP_PLUS: + case ROP_QUEST: + case ROP_LP: + if ((np->left.ptr = copy(ep, tp->left.ptr)) == 0) + break; + np->left.ptr->parent = np; + return np; + } + libuxre_regdeltree(np, 1); + return 0; +} + + /* + * Postorder traversal. + * Assign unique ascending integer values to the leaves. + * Since the right child is traversed before the left, + * the position for ROP_END is guaranteed to be zero. + * The parse tree is rewritten in two cases: + * - Each ROP_BRACE is replaced by an equivalent--sometimes + * large--subtree using only ROP_CAT, ROP_QUEST, and + * ROP_PLUS. + * - If REG_ICASE, replace each simple character that has + * an uppercase equivalent with a ROP_OR subtree over the + * two versions. + * Since these rewrites occur bottom up, they have already + * been applied before any subtrees passed to copy(). + */ +static Tree * +findposn(regex_t *ep, Tree *tp, int mb_cur_max) +{ + unsigned int lo, hi; + Tree *ptr, *par; + w_type wc; + + switch (tp->op) + { + default: + if (ep->re_flags & REG_ICASE + && (wc = to_upper(tp->op)) != tp->op) + { + if ((ptr = libuxre_reg1tree(tp->op, 0)) == 0) + return 0; + ptr->parent = tp; + ptr->left.pos = ep->re_dfa->nposn++; + tp->op = ROP_OR; + tp->left.ptr = ptr; + ptr = libuxre_reg1tree(wc, 0); + if ((tp->right.ptr = ptr) == 0) + return 0; + ptr->parent = tp; + ptr->left.pos = ep->re_dfa->nposn++; + return tp; + } + /*FALLTHROUGH*/ + case ROP_BOL: + case ROP_EOL: + case ROP_ALL: + case ROP_ANYCH: + case ROP_NOTNL: + case ROP_NONE: + case ROP_BKT: + case ROP_BKTCOPY: + case ROP_END: + tp->left.pos = ep->re_dfa->nposn++; + return tp; + case ROP_EMPTY: + return tp; + case ROP_OR: + case ROP_CAT: + if ((tp->right.ptr = findposn(ep, tp->right.ptr, + mb_cur_max)) == 0) + return 0; + /*FALLTHROUGH*/ + case ROP_STAR: + case ROP_PLUS: + case ROP_QUEST: + case ROP_LP: + if ((tp->left.ptr = findposn(ep, tp->left.ptr, + mb_cur_max)) == 0) + return 0; + return tp; + case ROP_BRACE: + if ((tp->left.ptr = findposn(ep, tp->left.ptr, + mb_cur_max)) == 0) + return 0; + break; + } + /* + * ROP_BRACE as is cannot be handled in a DFA. This code + * duplicates the ROP_BRACE subtree as a left-towering + * series of ROP_CAT nodes, the first "lo" of which are + * direct copies of the original subtree. The tail of + * the series are either some number of ROP_QUESTs over + * copies of the original subtree, or a single ROP_PLUS + * over a copy (when "hi" is infinity). + * + * All interesting cases {lo,hi}: + * {0,0} -> ROP_EMPTY, parsing, temporary + * {0,1} -> ROP_QUEST, parsing + * {0,2} -> CAT(QUEST(left), QUEST(copy)) + * {0,n} -> CAT({0,n-1}, QUEST(copy)) + * {0,} -> ROP_STAR, parsing + * + * {1,1} -> ROP_NOP, parsing, temporary + * {1,2} -> CAT(left, QUEST(copy)) + * {1,n} -> CAT({1,n-1}, QUEST(copy)) + * {1,} -> ROP_PLUS, parsing + * + * {2,2} -> CAT(left, copy) + * {2,n} -> CAT({2,n-1}, QUEST(copy)) + * {2,} -> CAT(left, PLUS(copy)) + * + * {3,3} -> CAT({2,2}, copy) + * {3,n} -> CAT({3,n-1}, QUEST(copy)) + * {3,} -> CAT({2,2}, PLUS(copy)) + * + * {n,} -> CAT({n-1,n-1}, PLUS(copy)) + * + * In all cases, the ROP_BRACE node is turned into the + * left-most ROP_CAT, and a copy of its original subtree + * is connected as the right child. Note that the bottom- + * up nature of this duplication guarantees that copy() + * never sees a ROP_BRACE node. + */ + par = tp->parent; + lo = tp->right.info.num[0]; + hi = tp->right.info.num[1]; + if ((ptr = copy(ep, tp->left.ptr)) == 0) + return 0; + ptr->parent = tp; + tp->op = ROP_CAT; + tp->right.ptr = ptr; + if (lo == 0) + { + if ((tp->left.ptr = libuxre_reg1tree(ROP_QUEST, tp->left.ptr)) + == 0) + return 0; + tp->left.ptr->parent = tp; + } + else + { + if (hi == BRACE_INF || (hi -= lo) == 0) + lo--; /* lo > 1; no extra needed */ + while (--lo != 0) + { + if ((tp = libuxre_reg2tree(ROP_CAT, tp, copy(ep, ptr))) + == 0) + return 0; + } + } + if (hi == BRACE_INF) + { + if ((tp->right.ptr = libuxre_reg1tree(ROP_PLUS, tp->right.ptr)) + == 0) + return 0; + tp->right.ptr->parent = tp; + } + else if (hi != 0) + { + if ((tp->right.ptr = libuxre_reg1tree(ROP_QUEST, tp->right.ptr)) + == 0) + return 0; + ptr = tp->right.ptr; + ptr->parent = tp; + while (--hi != 0) + { + if ((tp = libuxre_reg2tree(ROP_CAT, tp, copy(ep, ptr))) + == 0) + return 0; + } + } + tp->parent = par; + return tp; +} + + /* + * Postorder traversal, but not always entire subtree. + * For each leaf reachable by the empty string, add it + * to the set. Return 0 if the subtree can match empty. + */ +static int +first(Dfa *dp, Tree *tp) +{ + switch (tp->op) + { + case ROP_BOL: + if (dp->flags & REG_NOTBOL) + return 0; + break; + case ROP_EOL: + if (dp->flags & REG_NOTEOL) + return 0; + break; + case ROP_EMPTY: + return 0; + case ROP_OR: + return first(dp, tp->left.ptr) & first(dp, tp->right.ptr); + case ROP_CAT: + if (first(dp, tp->left.ptr) != 0) + return 1; + return first(dp, tp->right.ptr); + case ROP_BRACE: + if (tp->right.info.num[0] != 0 && first(dp, tp->left.ptr) != 0) + return 1; + /*FALLTHROUGH*/ + case ROP_STAR: + case ROP_QUEST: + first(dp, tp->left.ptr); + return 0; + case ROP_LP: + case ROP_PLUS: + return first(dp, tp->left.ptr); + } + if (dp->posset[tp->left.pos] == 0) + { + dp->posset[tp->left.pos] = 1; + dp->nset++; + } + return 1; +} + + /* + * Walk from leaf up (most likely not to root). + * Determine follow set for the leaf by filling + * set[] with the positions reachable. + */ +static void +follow(Dfa *dp, Tree *tp) +{ + Tree *pp; + + switch ((pp = tp->parent)->op) + { + case ROP_CAT: + if (pp->left.ptr == tp && first(dp, pp->right.ptr) != 0) + break; + /*FALLTHROUGH*/ + case ROP_OR: + case ROP_QUEST: + case ROP_LP: + follow(dp, pp); + break; + case ROP_STAR: + case ROP_PLUS: + case ROP_BRACE: + first(dp, tp); + follow(dp, pp); + break; + } +} + + /* + * Postorder traversal. + * At each leaf, copy it into posn[] and assign its follow set. + * Because the left-most subtree is ROP_ALL under ROP_STAR, the + * follow set for its leaf (position dp->nposn-1) is the same + * as the initial state's signature (prior to any ROP_BOL). + */ +static int +posnfoll(Dfa *dp, Tree *tp) +{ + unsigned char *s; + size_t i, n; + size_t *fp; + Posn *p; + int ret; + + switch (tp->op) + { + case ROP_OR: + case ROP_CAT: + if ((ret = posnfoll(dp, tp->right.ptr)) != 0) + return ret; + /*FALLTHROUGH*/ + case ROP_STAR: + case ROP_PLUS: + case ROP_QUEST: + case ROP_LP: + if ((ret = posnfoll(dp, tp->left.ptr)) != 0) + return ret; + return 0; + case ROP_END: /* keeps follow() from walking above the root */ + p = &dp->posn[tp->left.pos]; + p->op = tp->op; + p->seti = 0; + p->nset = 0; + return 0; + case ROP_BKT: + case ROP_BKTCOPY: + p = &dp->posn[tp->left.pos]; + p->bkt = tp->right.info.bkt; + goto skip; + case ROP_BOL: + dp->flags |= REG_NOTBOL; /* adjacent ROP_BOLs match empty */ + break; + case ROP_EOL: + dp->flags |= REG_NOTEOL; /* adjacent ROP_EOLs match empty */ + break; + } + p = &dp->posn[tp->left.pos]; +skip:; + p->op = tp->op; + memset(dp->posset, 0, dp->nposn); + dp->nset = 0; + follow(dp, tp); + dp->flags &= ~(REG_NOTBOL | REG_NOTEOL); + fp = dp->posfoll; + if ((p->nset = dp->nset) > dp->avail) /* need more */ + { + if ((n = p->nset << 1) < dp->nposn) + n = dp->nposn; + dp->avail += n; + if ((fp = realloc(dp->posfoll, + sizeof(size_t) * (dp->avail + dp->used))) == 0) + { + return REG_ESPACE; + } + dp->posfoll = fp; + } + p->seti = dp->used; + if ((i = dp->nset) != 0) + { + dp->used += i; + dp->avail -= i; + fp += p->seti; + s = dp->posset; + n = 0; + do + { + if (*s++ != 0) + { + *fp++ = n; + if (--i == 0) + break; + } + } while (++n != dp->nposn); + } + return 0; +} + +static int +addstate(Dfa *dp) /* install state if unique; return its index */ +{ + size_t *sp, *fp; + size_t t, n, i; + int flushed; + + /* + * Compare dp->nset/dp->cursig[] against remembered states. + */ + t = dp->top; + do + { + if (dp->nsig[--t] != dp->nset) + continue; + if ((n = dp->nset) != 0) + { + fp = &dp->sigfoll[dp->sigi[t]]; + sp = &dp->cursig[0]; + loop:; + if (*fp++ != *sp++) + continue; /* to the do-while */ + if (--n != 0) + goto loop; + } + return t + 1; + } while (t != 0); + /* + * Not in currently cached states; add it. + */ + flushed = 0; + if ((t = dp->top) >= CACHESZ) /* need to flush the cache */ + { + flushed = 1; + n = dp->anybol; + n = dp->sigi[n] + dp->nsig[n]; /* past invariant states */ + dp->avail += dp->used - n; + dp->used = n; + dp->top = n = dp->nfix; + memset((void *)&dp->trans, 0, sizeof(dp->trans)); + memset((void *)&dp->acc[n], 0, CACHESZ - n); + t = n; + } + dp->top++; + fp = dp->sigfoll; + if ((n = dp->nset) > dp->avail) /* grow strip */ + { + i = dp->avail + n << 1; + if ((fp = realloc(fp, sizeof(size_t) * (i + dp->used))) == 0) + return 0; + dp->avail = i; + dp->sigfoll = fp; + } + dp->acc[t] = 0; + if ((dp->nsig[t] = n) != 0) + { + sp = dp->cursig; + if (sp[0] == 0) + dp->acc[t] = 1; + dp->sigi[t] = i = dp->used; + dp->used += n; + dp->avail -= n; + fp += i; + do + *fp++ = *sp++; + while (--n != 0); + } + t++; + if (flushed) + return -t; + return t; +} + +void +libuxre_regdeldfa(Dfa *dp) +{ + Posn *pp; + size_t np; + + if (dp->posfoll != 0) + free(dp->posfoll); + if (dp->sigfoll != 0) + free(dp->sigfoll); + if (dp->cursig != 0) + free(dp->cursig); + if ((pp = dp->posn) != 0) + { + /* + * Need to walk the positions list to free any + * space used for ROP_BKTs. + */ + np = dp->nposn; + do + { + if (pp->op == ROP_BKT) + { + libuxre_bktfree(pp->bkt); + free(pp->bkt); + } + } while (++pp, --np != 0); + free(dp->posn); + } + free(dp); +} + +int +regtrans(Dfa *dp, int st, w_type wc, int mb_cur_max) +{ + const unsigned char *s; + size_t *fp, *sp; + size_t i, n; + Posn *pp; + int nst; + + if ((n = dp->nsig[st]) == 0) /* dead state */ + return st + 1; /* stay here */ + memset(dp->posset, 0, dp->nposn); + dp->nset = 0; + fp = &dp->sigfoll[dp->sigi[st]]; + do + { + pp = &dp->posn[*fp]; + switch (pp->op) + { + case ROP_EOL: + if (wc == '\0' && (dp->flags & REG_NOTEOL) == 0) + break; + /*FALLTHROUGH*/ + case ROP_BOL: + default: + if (pp->op == wc) + break; + /*FALLTHROUGH*/ + case ROP_END: + case ROP_NONE: + continue; + case ROP_NOTNL: + if (wc == '\n') + continue; + /*FALLTHROUGH*/ + case ROP_ANYCH: + if (wc <= '\0') + continue; + break; + case ROP_ALL: + if (wc == '\0') + continue; + break; + case ROP_BKT: + case ROP_BKTCOPY: + /* + * Note that multiple character bracket matches + * are precluded from DFAs. (See regparse.c and + * regcomp.c.) Thus, the continuation string + * argument is not used in libuxre_bktmbexec(). + */ + if (wc > '\0' && + libuxre_bktmbexec(pp->bkt, wc, 0, mb_cur_max) == 0) + break; + continue; + } + /* + * Current character matches this position. + * For each position in its follow list, + * add that position to the new state's signature. + */ + i = pp->nset; + sp = &dp->posfoll[pp->seti]; + do + { + if (dp->posset[*sp] == 0) + { + dp->posset[*sp] = 1; + dp->nset++; + } + } while (++sp, --i != 0); + } while (++fp, --n != 0); + /* + * Move the signature (if any) into cursig[] and install it. + */ + if ((i = dp->nset) != 0) + { + fp = dp->cursig; + s = dp->posset; + for (n = 0;; n++) + { + if (*s++ != 0) + { + *fp++ = n; + if (--i == 0) + break; + } + } + } + if ((nst = addstate(dp)) < 0) /* flushed cache */ + nst = -nst; + else if (nst > 0 && (wc & ~(long)(NCHAR - 1)) == 0) + dp->trans[st][wc] = nst; + return nst; +} + +LIBUXRE_STATIC int +libuxre_regdfacomp(regex_t *ep, Tree *tp, Lex *lxp) +{ + Tree *lp; + Dfa *dp; + Posn *p; + int st; + + /* + * It's convenient to insert an STAR(ALL) subtree to the + * immediate left of the current tree. This makes the + * "any match" libuxre_regdfaexec() not a special case, + * and the initial state signature will fall out when + * building the follow sets for all the leaves. + */ + if ((lp = libuxre_reg1tree(ROP_ALL, 0)) == 0 + || (lp = libuxre_reg1tree(ROP_STAR, lp)) == 0 + || (tp->left.ptr = lp + = libuxre_reg2tree(ROP_CAT, lp, tp->left.ptr)) == 0) + { + return REG_ESPACE; + } + lp->parent = tp; + if ((dp = calloc(1, sizeof(Dfa))) == 0) + return REG_ESPACE; + ep->re_dfa = dp; + /* + * Just in case null pointers aren't just all bits zero... + */ + dp->posfoll = 0; + dp->sigfoll = 0; + dp->cursig = 0; + dp->posn = 0; + /* + * Assign position values to each of the tree's leaves + * (the important parts), meanwhile potentially rewriting + * the parse tree so that it fits within the restrictions + * of our DFA. + */ + if ((tp = findposn(ep, tp, lxp->mb_cur_max)) == 0) + goto err; + /* + * Get space for the array of positions and current set, + * now that the number of positions is known. + */ + if ((dp->posn = malloc(sizeof(Posn) * dp->nposn + dp->nposn)) == 0) + goto err; + dp->posset = (unsigned char *)&dp->posn[dp->nposn]; + /* + * Get follow sets for each position. + */ + if (posnfoll(dp, tp) != 0) + goto err; + /* + * Set up the special invariant states: + * - dead state (no valid transitions); index 0. + * - initial state for any match [STAR(ALL) follow set]; index 1. + * - initial state for any match after ROP_BOL. + * - initial state for left-most longest if REG_NOTBOL. + * - initial state for left-most longest after ROP_BOL. + * The final two are not allocated if leftmost() cannot be called. + * The pairs of initial states are the same if there is no + * explicit ROP_BOL transition. + */ + dp->avail += dp->used; + dp->used = 0; + if ((dp->sigfoll = malloc(sizeof(size_t) * dp->avail)) == 0) + goto err; + p = &dp->posn[dp->nposn - 1]; /* same as first(root) */ + dp->cursig = &dp->posfoll[p->seti]; + dp->nset = p->nset; + dp->top = 1; /* index 0 is dead state */ + addstate(dp); /* must be state index 1 (returns 2) */ + if ((dp->cursig = malloc(sizeof(size_t) * dp->nposn)) == 0) + goto err; + dp->nfix = 2; + if ((st = regtrans(dp, 1, ROP_BOL, lxp->mb_cur_max)) == 0) + goto err; + if ((dp->anybol = st - 1) == 2) /* new state */ + dp->nfix = 3; + if ((ep->re_flags & REG_NOSUB) == 0) /* leftmost() might be called */ + { + /* + * leftmost() initial states are the same as the + * "any match" ones without the STAR(ALL) position. + */ + dp->sigi[dp->nfix] = 0; + dp->nsig[dp->nfix] = dp->nsig[1] - 1; + dp->acc[dp->nfix] = dp->acc[1]; + dp->leftbol = dp->leftmost = dp->nfix; + dp->nfix++; + if (dp->anybol != 1) /* distinct state w/BOL */ + { + dp->sigi[dp->nfix] = dp->sigi[2]; + dp->nsig[dp->nfix] = dp->nsig[2] - 1; + dp->acc[dp->nfix] = dp->acc[2]; + dp->leftbol = dp->nfix; + dp->nfix++; + } + dp->top = dp->nfix; + } + return 0; +err:; + libuxre_regdeldfa(dp); + return REG_ESPACE; +} + +static int +leftmost(Dfa *dp, Exec *xp) +{ + const unsigned char *s, *beg, *end; + int i, nst, st, mb_cur_max; + w_type wc; + + mb_cur_max = xp->mb_cur_max; + beg = s = xp->str; + end = 0; + st = dp->leftbol; + if (xp->flags & REG_NOTBOL) + st = dp->leftmost; + if (dp->acc[st] && (xp->flags & REG_NONEMPTY) == 0) + end = s; /* initial empty match allowed */ + for (;;) + { + if ((wc = *s++) == '\n') + { + if (xp->flags & REG_NEWLINE) + wc = ROP_EOL; + } + else if (!ISONEBYTE(wc) && (i = libuxre_mb2wc(&wc, s)) > 0) + s += i; + if ((wc & ~(long)(NCHAR - 1)) != 0 + || (nst = dp->trans[st][wc]) == 0) + { + if ((nst=regtrans(dp, st, wc, mb_cur_max)) == 0) + return REG_ESPACE; + if (wc == ROP_EOL) /* REG_NEWLINE only */ + { + if (dp->acc[nst - 1]) + { + if (end == 0 || end < s) + end = s; + break; + } + beg = s; + st = dp->leftbol; + goto newst; + } + } + if ((st = nst - 1) == 0) /* dead state */ + { + if (end != 0) + break; + if ((wc = *beg++) == '\0') + return REG_NOMATCH; + else if (!ISONEBYTE(wc) && + (i = libuxre_mb2wc(&wc, beg)) > 0) + beg += i; + s = beg; + st = dp->leftmost; + goto newst; + } + if (wc == '\0') + { + if (dp->acc[st]) + { + s--; /* don't include \0 */ + if (end == 0 || end < s) + end = s; + break; + } + if (end != 0) + break; + return REG_NOMATCH; + } + newst:; + if (dp->acc[st]) + { + if (end == 0 || end < s) + end = s; + } + } + xp->match[0].rm_so = beg - xp->str; + xp->match[0].rm_eo = end - xp->str; + return 0; +} + +/* +* Optimization by simplification: singlebyte locale and REG_NEWLINE not set. +* Performance gain for grep is 25% so it's worth the hack. +*/ +static int +regdfaexec_opt(Dfa *dp, Exec *xp) +{ + const unsigned char *s; + int nst, st; + + s = xp->str; + st = dp->anybol; + if (xp->flags & REG_NOTBOL) + st = 1; + if (dp->acc[st] && (xp->flags & REG_NONEMPTY) == 0) + return 0; /* initial empty match allowed */ + do + { + if ((nst = dp->trans[st][*s]) == 0) + { + if ((nst = regtrans(dp, st, *s, 1)) == 0) + return REG_ESPACE; + } + if (dp->acc[st = nst - 1]) + return 0; + } while (*s++ != '\0'); /* st != 0 */ + return REG_NOMATCH; +} + +LIBUXRE_STATIC int +libuxre_regdfaexec(Dfa *dp, Exec *xp) +{ + const unsigned char *s; + int i, nst, st, mb_cur_max; + w_type wc; + + dp->flags = xp->flags & REG_NOTEOL; /* for regtrans() */ + mb_cur_max = xp->mb_cur_max; + if (xp->nmatch != 0) + return leftmost(dp, xp); + if (mb_cur_max == 1 && (xp->flags & REG_NEWLINE) == 0) + return regdfaexec_opt(dp, xp); + s = xp->str; + st = dp->anybol; + if (xp->flags & REG_NOTBOL) + st = 1; + if (dp->acc[st] && (xp->flags & REG_NONEMPTY) == 0) + return 0; /* initial empty match allowed */ + for (;;) + { + if ((wc = *s++) == '\n') + { + if (xp->flags & REG_NEWLINE) + wc = ROP_EOL; + } + else if (!ISONEBYTE(wc) && (i = libuxre_mb2wc(&wc, s)) > 0) + s += i; + if ((wc & ~(long)(NCHAR - 1)) != 0 + || (nst = dp->trans[st][wc]) == 0) + { + if ((nst=regtrans(dp, st, wc, mb_cur_max)) == 0) + return REG_ESPACE; + if (wc == ROP_EOL) /* REG_NEWLINE only */ + { + if (dp->acc[nst - 1]) + return 0; + if (dp->acc[st = dp->anybol]) + return 0; + continue; + } + } + if (dp->acc[st = nst - 1]) + return 0; + if (wc == '\0') /* st == 0 */ + return REG_NOMATCH; + } +} diff --git a/libuxre/regdfa.h b/libuxre/regdfa.h new file mode 100644 index 0000000..8cb0d48 --- /dev/null +++ b/libuxre/regdfa.h @@ -0,0 +1,75 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regdfa.h 1.3 (gritter) 9/22/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ + +/* +* Deterministic Finite Automata. +*/ + +#ifndef LIBUXRE_REGDFA_H +#define LIBUXRE_REGDFA_H + +#include + +typedef struct +{ + Bracket *bkt; /* extra info for ROP_BKT */ + size_t nset; /* number of items in the follow set */ + size_t seti; /* index into the follow set strip */ + w_type op; /* the leaf match operation */ +} Posn; + +#define CACHESZ 32 /* max. states to remember (must fit in uchar) */ +#define NCHAR (1 << CHAR_BIT) + +struct re_dfa_ /*Dfa*/ +{ + unsigned char *posset; /* signatures built here */ + size_t *posfoll; /* follow strip for posn[] */ + size_t *sigfoll; /* follow strip for sigi[] */ + size_t *cursig; /* current state's signature */ + Posn *posn; /* important positions */ + size_t nposn; /* length of posn,cursig,posset */ + size_t used; /* used portion of follow strip */ + size_t avail; /* unused part of follow strip */ + size_t nset; /* # items nonzero in posset[] */ + size_t nsig[CACHESZ]; /* number of items in signature */ + size_t sigi[CACHESZ]; /* index into sigfoll[] */ + unsigned char acc[CACHESZ]; /* nonzero for accepting states */ + unsigned char leftmost; /* leftmost() start, not BOL */ + unsigned char leftbol; /* leftmost() start, w/BOL */ + unsigned char anybol; /* any match start, w/BOL */ + unsigned char nfix; /* number of invariant states */ + unsigned char top; /* next state index available */ + unsigned char flags; /* interesting flags */ + unsigned char trans[CACHESZ][NCHAR]; /* goto table */ +}; + +extern int regtrans(Dfa *, int, w_type, int); + +#endif /* !LIBUXRE_REGDFA_H */ diff --git a/libuxre/regerror.c b/libuxre/regerror.c new file mode 100644 index 0000000..397e3e5 --- /dev/null +++ b/libuxre/regerror.c @@ -0,0 +1,95 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regerror.c 1.4 (gritter) 3/29/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include +#include "re.h" +/* include "_locale.h" */ + +/* #pragma weak regerror = _regerror */ + +size_t +regerror(int err, const regex_t *ep, char *str, size_t max) +{ + const struct + { + int index; + const char *str; + } unk = + { + 88, "unknown regular expression error" + }, msgs[] = + { + /*ENOSYS*/ { 89, "feature not implemented" }, + /*0*/ { 0, "" }, + /*NOMATCH*/ { 90, "regular expression failed to match" }, + /*BADPAT*/ { 91, "invalid regular expression" }, + /*ECOLLATE*/ { 92, "invalid collating element construct" }, + /*ECTYPE*/ { 93, "invalid character class construct" }, + /*EEQUIV*/ { 94, "invalid equivalence class construct" }, + /*EBKTCHAR*/ { 95, "invalid character in '[ ]' construct" }, + /*EESCAPE*/ { 96, "trailing \\ in pattern" }, + /*ESUBREG*/ { 97, "'\\digit' out of range" }, + /*EBRACK*/ { 98, "'[ ]' imbalance" }, + /*EMPTYSUBBKT*/ { 99, "empty nested '[ ]' construct" }, + /*EMPTYPAREN*/ { 100, "empty '\\( \\)' or '( )'" }, + /*NOPAT*/ { 101, "empty pattern" }, + /*EPAREN*/ { 102, "'\\( \\)' or '( )' imbalance" }, + /*EBRACE*/ { 103, "'\\{ \\} or '{ }' imbalance" }, + /*BADBR*/ { 104, "invalid '\\{ \\}' or '{ }'" }, + /*ERANGE*/ { 105, "invalid endpoint in range" }, + /*ESPACE*/ { 106, "out of regular expression memory" }, + /*BADRPT*/ { 107, "invalid *, +, ?, \\{\\} or {} operator" }, + /*BADESC*/ { 108, "invalid escape sequence (e.g. \\0)" }, + /*ILLSEQ*/ { 109, "illegal byte sequence"} + }; + const char *p; + size_t len; + int i; + + if (err < REG_ENOSYS || REG_ILLSEQ < err) + { + i = unk.index; + p = unk.str; + } + else + { + i = msgs[err - REG_ENOSYS].index; + p = msgs[err - REG_ENOSYS].str; + } +/* p = __gtxt(_str_uxlibc, i, p); */ + len = strlen(p) + 1; + if (max != 0) + { + if (max > len) + max = len; + else if (max < len) + str[--max] = '\0'; + memcpy(str, p, max); + } + return len; +} diff --git a/libuxre/regex.h b/libuxre/regex.h new file mode 100644 index 0000000..8dbd028 --- /dev/null +++ b/libuxre/regex.h @@ -0,0 +1,153 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regex.h 1.13 (gritter) 2/6/05 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef LIBUXRE_REGEX_H +#define LIBUXRE_REGEX_H +/* from unixsrc:usr/src/common/head/regex.h /main/uw7_nj/1 */ + +#include /* really only want [s]size_t */ + + /* + * Official regexec() flags. + */ +#define REG_NOTBOL 0x000001 /* start of string does not match ^ */ +#define REG_NOTEOL 0x000002 /* end of string does not match $ */ + + /* + * Additional regexec() flags. + */ +#define REG_NONEMPTY 0x000004 /* do not match empty at start of string */ + + /* + * Extensions to provide individual control over each + * of the differences between basic and extended REs. + */ +#define REG_OR 0x0000001 /* enable | operator */ +#define REG_PLUS 0x0000002 /* enable + operator */ +#define REG_QUEST 0x0000004 /* enable ? operator */ +#define REG_BRACES 0x0000008 /* use {m,n} (instead of \{m,n\}) */ +#define REG_PARENS 0x0000010 /* use (...) [instead of \(...\)] */ +#define REG_ANCHORS 0x0000020 /* ^ and $ are anchors anywhere */ +#define REG_NOBACKREF 0x0000040 /* disable \digit */ +#define REG_NOAUTOQUOTE 0x0000080 /* no automatic quoting of REG_BADRPTs */ + + /* + * Official regcomp() flags. + */ +#define REG_EXTENDED (REG_OR | REG_PLUS | REG_QUEST | REG_BRACES | \ + REG_PARENS | REG_ANCHORS | \ + REG_NOBACKREF | REG_NOAUTOQUOTE) +#define REG_ICASE 0x0000100 /* ignore case */ +#define REG_NOSUB 0x0000200 /* only success/fail for regexec() */ +#define REG_NEWLINE 0x0000400 /* take \n as line separator for ^ and $ */ + + /* + * Additional regcomp() flags. + * Some of these assume that int is >16 bits! + * Beware: 0x20000000 and above are used in re.h. + */ +#define REG_ONESUB 0x0000800 /* regexec() only needs pmatch[0] */ +#define REG_MTPARENFAIL 0x0001000 /* take empty \(\) or () as match failure */ +#define REG_MTPARENBAD 0x0002000 /* disallow empty \(\) or () */ +#define REG_BADRANGE 0x0004000 /* accept [m-a] ranges as [ma] */ +#define REG_ODDRANGE 0x0008000 /* oawk oddity: [m-a] means [m] */ +#define REG_SEPRANGE 0x0010000 /* disallow [a-m-z] style ranges */ +#define REG_BKTQUOTE 0x0020000 /* allow \ in []s to quote \, -, ^ or ] */ +#define REG_BKTEMPTY 0x0040000 /* allow empty []s (w/BKTQUOTE, BKTESCAPE) */ +#define REG_ANGLES 0x0080000 /* enable \<, \> operators */ +#define REG_ESCNL 0x0100000 /* take \n as newline character */ +#define REG_NLALT 0x0200000 /* take newline as alternation */ +#define REG_ESCSEQ 0x0400000 /* otherwise, take \ as start of C escapes */ +#define REG_BKTESCAPE 0x0800000 /* allow \ in []s to quote next anything */ +#define REG_NOBRACES 0x1000000 /* disable {n,m} */ +#define REG_ADDITIVE 0x2000000 /* a+*b means + and * additive, ^+ is valid */ +#define REG_NOI18N 0x4000000 /* disable I18N features ([::] etc.) */ +#define REG_OLDESC 0x8000000 /* recognize \b \f \n \r \t \123 only */ +#define REG_AVOIDNULL 0x10000000/* avoid null subexpression matches */ +#define REG_OLDBRE (REG_BADRANGE | REG_ANGLES | REG_ESCNL) +#define REG_OLDERE (REG_OR | REG_PLUS | REG_QUEST | REG_NOBRACES | \ + REG_PARENS | REG_ANCHORS | REG_ODDRANGE | \ + REG_NOBACKREF | REG_ADDITIVE | REG_NOAUTOQUOTE) + + /* + * Error return values. + */ +#define REG_ENOSYS (-1) /* unsupported */ +#define REG_NOMATCH 1 /* regexec() failed to match */ +#define REG_BADPAT 2 /* invalid regular expression */ +#define REG_ECOLLATE 3 /* invalid collating element construct */ +#define REG_ECTYPE 4 /* invalid character class construct */ +#define REG_EEQUIV 5 /* invalid equivalence class construct */ +#define REG_EBKTCHAR 6 /* invalid character in [] construct */ +#define REG_EESCAPE 7 /* trailing \ in pattern */ +#define REG_ESUBREG 8 /* number in \digit invalid or in error */ +#define REG_EBRACK 9 /* [] imbalance */ +#define REG_EMPTYSUBBKT 10 /* empty sub-bracket construct */ +#define REG_EMPTYPAREN 11 /* empty \(\) or () [REG_MTPARENBAD] */ +#define REG_NOPAT 12 /* no (empty) pattern */ +#define REG_EPAREN 13 /* \(\) or () imbalance */ +#define REG_EBRACE 14 /* \{\} or {} imbalance */ +#define REG_BADBR 15 /* contents of \{\} or {} invalid */ +#define REG_ERANGE 16 /* invalid endpoint in expression */ +#define REG_ESPACE 17 /* out of memory */ +#define REG_BADRPT 18 /* *,+,?,\{\} or {} not after r.e. */ +#define REG_BADESC 19 /* invalid escape sequence (e.g. \0) */ +#define REG_ILLSEQ 20 /* illegal byte sequence */ + +typedef struct +{ + size_t re_nsub; /* only advertised member */ + unsigned long re_flags; /* augmented regcomp() flags */ + struct re_dfa_ *re_dfa; /* DFA engine */ + struct re_nfa_ *re_nfa; /* NFA engine */ + struct re_coll_ *re_col; /* current collation info */ + int re_mb_cur_max; /* MB_CUR_MAX acceleration */ + void *re_more; /* just in case... */ +} regex_t; + +typedef ssize_t regoff_t; + +typedef struct +{ + regoff_t rm_so; + regoff_t rm_eo; +} regmatch_t; + +#ifdef __cplusplus +extern "C" { +#endif + +int regcomp(regex_t *, const char *, int); +int regexec(const regex_t *, const char *, size_t, regmatch_t *, int); +size_t regerror(int, const regex_t *, char *, size_t); +void regfree(regex_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* !LIBUXRE_REGEX_H */ diff --git a/libuxre/regexec.c b/libuxre/regexec.c new file mode 100644 index 0000000..667868f --- /dev/null +++ b/libuxre/regexec.c @@ -0,0 +1,68 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regexec.c 1.7 (gritter) 2/6/05 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include "re.h" + +/* #pragma weak regexec = _regexec */ + +int +regexec(const regex_t *ep, const char *s, size_t n, regmatch_t *mp, int flg) +{ + Exec ex; + int ret; + + ex.flags = flg | (ep->re_flags & (REG_NEWLINE|REG_ICASE|REG_AVOIDNULL)); + ex.str = (const unsigned char *)s; + ex.match = mp; + ex.mb_cur_max = ep->re_mb_cur_max; + if ((ex.nmatch = n) != 0) /* impose limits from compile flags */ + { + if (ep->re_flags & REG_NOSUB) + n = ex.nmatch = 0; + else if (ep->re_flags & REG_ONESUB) + ex.nmatch = 1; + else if (n > ep->re_nsub + 1) + ex.nmatch = ep->re_nsub + 1; + } + if (ep->re_flags & REG_DFA && ex.nmatch <= 1) + ret = libuxre_regdfaexec(ep->re_dfa, &ex); + else + ret = libuxre_regnfaexec(ep->re_nfa, &ex); + /* + * Fill unused part of mp[]. + */ + if (ret != 0) + ex.nmatch = 0; + while (n > ex.nmatch) + { + n--; + mp[n].rm_so = -1; + mp[n].rm_eo = -1; + } + return ret; +} diff --git a/libuxre/regfree.c b/libuxre/regfree.c new file mode 100644 index 0000000..31180d7 --- /dev/null +++ b/libuxre/regfree.c @@ -0,0 +1,42 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regfree.c 1.3 (gritter) 9/22/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include "re.h" + +/* #pragma weak regfree = _regfree */ + +void +regfree(regex_t *ep) +{ + if (ep->re_flags & REG_DFA) + libuxre_regdeldfa(ep->re_dfa); + if (ep->re_flags & REG_NFA) + libuxre_regdelnfa(ep->re_nfa); + if (ep->re_col != 0) + (void)libuxre_lc_collate(ep->re_col); +} diff --git a/libuxre/regnfa.c b/libuxre/regnfa.c new file mode 100644 index 0000000..6953f1f --- /dev/null +++ b/libuxre/regnfa.c @@ -0,0 +1,1070 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regnfa.c 1.8 (gritter) 2/6/05 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include +#include +#include "re.h" +#include +#include + +typedef unsigned char Uchar; +typedef unsigned short Ushort; + +/* +* Nondeterministic Finite Automata. +*/ +typedef struct t_graph Graph; +struct t_graph +{ + union + { + Graph *ptr; + Info info; + } alt; + Graph *next; + w_type op; +}; + +typedef struct t_stack Stack; +struct t_stack +{ + Stack *link; /* simplifies cleanup */ + Stack *prev; /* covered states */ + Graph *wasgp; /* node associated with this state */ + const Uchar *str; /* saved position in the string */ + Ushort cnt; /* ROP_BRACE: traversal count */ +}; + + /* + * A Context holds all the information needed for each + * potential path through the NFA graph. + */ +typedef struct t_ctxt Context; +struct t_ctxt +{ + Context *link; /* simplifies cleanup */ + Context *next; /* singly linked */ + Stack *sp; /* nested counts */ + Graph *gp; /* starting node */ + Graph *wasgp; /* node associated with this state */ + const Uchar *str; /* saved position in the string */ + Ushort cnt; /* ROP_BRACE: traversal count */ + size_t nset; /* length of rm[] that is currently set */ + regmatch_t rm[1]; /* enough to cover re_nsub+1 (np->rmlen) */ +}; + +struct re_nfa_ /*Nfa*/ +{ + Graph *gp; /* entire NFA */ + Stack *sp; /* unused Stacks */ + Stack *allsp; /* linked Stacks (for cleanup) */ + Context *allcp; /* linked Contexts (for cleanup) */ + Context *cur; /* Contexts to be continued now */ + Context *step; /* Contexts waiting for a step of the NFA */ + Context *avail; /* unused Contexts */ + Context **ecur; /* ends cur list of Contexts */ + Context **estp; /* ends step list of Contexts */ + size_t rmlen; /* length of rm[] in each Context */ + size_t rmmin; /* minimum length needed */ + size_t used; /* length used for this libuxre_regnfaexec() */ + w_type beg; /* nonzero for fixed char initial node NFAs */ +}; + +#define ROP_MTOR ROP_CAT /* ROP_OR, except might be empty loop */ + + /* + * Depth first traversal. + * Make a singly linked list (in alt.ptr) of the graph's nodes. + * Must toss any ROP_BKTs, too, since "alt" is overwritten. + */ +static void +deltolist(Graph *gp, Graph **list) +{ + Graph *ptr; + + if ((ptr = gp->next) != 0) /* first time */ + { + gp->next = 0; + if (gp->op == ROP_OR || gp->op == ROP_MTOR) + deltolist(gp->alt.ptr, list); + deltolist(ptr, list); + if (gp->op == ROP_BKT) + { + libuxre_bktfree(gp->alt.info.bkt); + free(gp->alt.info.bkt); + } + } + else if (gp->op == ROP_END) + gp->op = ROP_NOP; + else + return; + gp->alt.ptr = *list; + *list = gp; +} + + /* + * After the list is turned into a linked list, + * walk that list freeing the nodes. + */ +static void +delgraph(Graph *gp) +{ + Graph *gp2, end; + + gp2 = &end; + deltolist(gp, &gp2); + while ((gp = gp2) != &end) + { + gp2 = gp->alt.ptr; + free(gp); + } +} + + /* + * Depth first traversal. + * Look for ROP_NOPs and prune them from the graph. + * Chain them all together on *nop's list. + */ +static Graph * +nopskip(Graph *gp, Graph **nop) +{ + Graph *ptr; + + if ((ptr = gp->next) != 0) /* might have yet to do this subgraph */ + { + if (gp->op == ROP_NOP) + { + if (gp->alt.ptr != 0) /* touched */ + return gp->next; /* already did it */ + gp->alt.ptr = *nop; + *nop = gp; + } + gp->next = 0; /* this subgraph's pending */ + if (gp->op == ROP_OR || gp->op == ROP_MTOR) + gp->alt.ptr = nopskip(gp->alt.ptr, nop); + gp->next = nopskip(ptr, nop); + if (gp->op == ROP_NOP) + return gp->next; + } + return gp; +} + + /* + * Postorder traversal of the parse tree. + * Build a graph using "Thompson's" algorithm. + * The only significant modification is the + * ROP_BRACE->ROP_MTOR construction. + * Returns 1 => graph might match empty + * 0 => graph cannot match empty + * -1 => error (in allocation) + */ +static int +mkgraph(Tree *tp, Graph **first, Graph **last) +{ + Graph *new = 0, *nop, *lf, *ll, *rf, *rl; + int lmt, rmt = 0; + + if (tp->op != ROP_CAT) + { + if ((new = malloc(sizeof(Graph))) == 0) + return 0; + new->op = tp->op; /* usually */ + } + switch (tp->op) + { + case ROP_REF: + new->alt.info.sub = tp->right.info.sub; + *first = new; + *last = new; + return 1; /* safe--can't really tell */ + case ROP_BKT: + tp->op = ROP_BKTCOPY; /* now graph owns clean up */ + /*FALLTHROUGH*/ + case ROP_BKTCOPY: + new->alt.info.bkt = tp->right.info.bkt; + /*FALLTHROUGH*/ + default: + *first = new; + *last = new; + return 0; + case ROP_EMPTY: + new->op = ROP_NOP; + new->alt.ptr = 0; /* untouched */ + *first = new; + *last = new; + return 1; + case ROP_OR: + case ROP_CAT: + lf = 0; /* in case of error */ + if ((rmt = mkgraph(tp->right.ptr, &rf, &rl)) < 0) + goto err; + /*FALLTHROUGH*/ + case ROP_STAR: + case ROP_PLUS: + case ROP_QUEST: + case ROP_BRACE: + case ROP_LP: + if ((lmt = mkgraph(tp->left.ptr, &lf, &ll)) < 0) + goto err; + break; + } + /* + * Note that ROP_NOP only serves as the node that reconnects + * the two choices of an incoming ROP_OR or ROP_QUEST. To + * prevent rewalking portions of the graph in nopskip(), + * this code marks all ROP_NOP nodes as currently untouched. + */ + switch (tp->op) + { + case ROP_OR: + if ((nop = malloc(sizeof(Graph))) == 0) + goto err; + nop->op = ROP_NOP; + nop->alt.ptr = 0; /* untouched */ + ll->next = nop; + rl->next = nop; + new->next = lf; + new->alt.ptr = rf; + *first = new; + *last = nop; + return lmt | rmt; + case ROP_CAT: /* no "new" */ + ll->next = rf; + *first = lf; + *last = rl; + return lmt & rmt; + case ROP_QUEST: + if ((nop = malloc(sizeof(Graph))) == 0) + goto err; + nop->op = ROP_NOP; + nop->alt.ptr = 0; /* untouched */ + new->op = ROP_OR; + new->next = lf; + new->alt.ptr = nop; + ll->next = nop; + *first = new; + *last = nop; + return 1; + case ROP_STAR: + *first = new; + rmt = 1; + star:; + new->op = lmt ? ROP_MTOR : ROP_OR; + new->alt.ptr = lf; + ll->next = new; + *last = new; + return rmt; + case ROP_PLUS: + *first = lf; + rmt = lmt; + goto star; + case ROP_BRACE: + if ((nop = malloc(sizeof(Graph))) == 0) + goto err; + nop->op = ROP_MTOR; /* going to save state anyway... */ + nop->alt.ptr = lf; + ll->next = new; + new->next = nop; + new->alt.info.num[1] = tp->right.info.num[1]; + if ((new->alt.info.num[0] = tp->right.info.num[0]) == 0) + { + lmt = 1; + *first = new; + } + else + { + new->alt.info.num[0]--; /* already done 1 */ + if (new->alt.info.num[1] != BRACE_INF) + new->alt.info.num[1]--; /* likewise */ + *first = lf; + } + *last = nop; + return lmt; + case ROP_LP: + if ((nop = malloc(sizeof(Graph))) == 0) + goto err; + nop->op = ROP_RP; + nop->alt.info.sub = tp->right.info.sub; + new->alt.info.sub = tp->right.info.sub; + new->next = lf; + ll->next = nop; + *first = new; + *last = nop; + return lmt; + } +err:; + if (KIND_ROP(tp->op) == BINARY_ROP && rf != 0) + delgraph(rf); + if (lf != 0) + delgraph(lf); + if (tp->op != ROP_CAT) + free(new); + return -1; +} + + /* + * Semi-preorder traversal. + * Return zero if there's no simple first character + * (including the operation ROP_BOL) that must always + * be at the start of a matching string. + * This code doesn't attempt to get an answer if the + * first of the tree many be empty. + */ +static w_type +firstop(Tree *tp) +{ + w_type op; + + switch (tp->op) + { + case ROP_OR: + if ((op = firstop(tp->left.ptr)) == 0 + || op != firstop(tp->right.ptr)) + { + return 0; + } + return op; + case ROP_BRACE: + if (tp->right.info.num[0] == 0) + return 0; + /*FALLTHROUGH*/ + case ROP_CAT: + case ROP_PLUS: + case ROP_LP: + return firstop(tp->left.ptr); + default: + if (tp->op < 0) + return 0; + /*FALLTHROUGH*/ + case ROP_BOL: + return tp->op; + } +} + +void +libuxre_regdelnfa(Nfa *np) +{ + Context *cp, *cpn; + Stack *sp, *spn; + + if (np->gp != 0) + delgraph(np->gp); + for (cp = np->allcp; cp != 0; cp = cpn) + { + cpn = cp->link; + free(cp); + } + for (sp = np->allsp; sp != 0; sp = spn) + { + spn = sp->link; + free(sp); + } + free(np); +} + +LIBUXRE_STATIC int +libuxre_regnfacomp(regex_t *ep, Tree *tp, Lex *lxp) +{ + Graph *gp, end; + Nfa *np; + + if ((np = malloc(sizeof(Nfa))) == 0) + goto err; + np->gp = 0; /* in case of error */ + if (mkgraph(tp, &np->gp, &gp) < 0) + goto err; + gp->next = 0; /* nothing follows ROP_END */ + np->rmlen = 0; + if ((ep->re_flags & REG_NOSUB) == 0) + np->rmlen = ep->re_nsub + 1; + np->rmmin = 0; + if (lxp->maxref != 0 && (np->rmmin = lxp->maxref + 1) > np->rmlen) + np->rmlen = np->rmmin; + /* + * Delete all ROP_NOPs from the graph. + * nopskip() disconnects them from the graph and + * links them together through their alt.ptr's. + */ + gp = &end; + np->gp = nopskip(np->gp, &gp); + while (gp != &end) + { + Graph *gp2 = gp; + + gp = gp->alt.ptr; + free(gp2); + } + np->sp = 0; + np->allsp = 0; + np->avail = 0; + np->allcp = 0; + ep->re_nfa = np; + np->beg = firstop(tp); + return 0; +err:; + if (np != 0) + { + if (np->gp != 0) + delgraph(np->gp); + free(np); + } + return REG_ESPACE; +} + +static Stack * +newstck(Nfa *np) +{ + Stack *sp, **spp; + int i; + + if ((sp = np->sp) == 0) /* get more */ + { + spp = &np->sp; + i = 4; + while ((sp = malloc(sizeof(Stack))) != 0) + { + sp->link = np->allsp; + np->allsp = sp; + *spp = sp; + spp = &sp->prev; + if (--i == 0) + break; + } + *spp = 0; + if ((sp = np->sp) == 0) /* first malloc failed */ + return 0; + } + np->sp = sp->prev; + return sp; +} + +static int +mkstck(Nfa *np, Context *cp, Graph *gp) +{ + Stack *new, *sp; + + if (gp == 0) /* copy existing stack tail */ + { + /* + * Hoist up top of stack. + */ + new = cp->sp; + cp->wasgp = new->wasgp; + cp->str = new->str; + cp->cnt = new->cnt; + cp->sp = new->prev; + if ((sp = new->prev) == 0) /* only one below */ + { + new->prev = np->sp; + np->sp = new; + cp->sp = 0; + return 0; + } + for (;;) /* copy the rest; reusing the old top */ + { + new->wasgp = sp->wasgp; + new->str = sp->str; + new->cnt = sp->cnt; + if ((new->prev = sp->prev) == 0) + break; + if ((new->prev = newstck(np)) == 0) + return REG_ESPACE; + new = new->prev; + sp = sp->prev; + } + return 0; + } + if (cp->wasgp != 0) /* push current down */ + { + if ((new = newstck(np)) == 0) + return REG_ESPACE; + new->prev = cp->sp; + cp->sp = new; + new->wasgp = cp->wasgp; + new->str = cp->str; + new->cnt = cp->cnt; + } + cp->wasgp = gp; + cp->str = 0; + cp->cnt = 0; + return 0; +} + + /* + * Allocate a new Context (from np->avail) + * and add it to the end of the current list. + */ +static int +newctxt(Nfa *np, Context *cp, Graph *gp) +{ + Context *new; + size_t n; + + if ((new = np->avail) == 0) /* need more */ + { + Context *ncp, **cpp; + int i; + + /* + * Can't easily allocate Contexts in one call because + * the alignments (given the varying length of rm[]) + * are potentially nontrivial. + */ + n = offsetof(Context, rm) + np->rmlen * sizeof(regmatch_t); + i = 4; + cpp = &np->avail; + while ((ncp = malloc(n)) != 0) + { + ncp->link = np->allcp; + np->allcp = ncp; + *cpp = ncp; + cpp = &ncp->next; + if (--i == 0) + break; + } + *cpp = 0; + if ((new = np->avail) == 0) /* first malloc failed */ + return REG_ESPACE; + } + np->avail = new->next; + new->next = 0; + new->gp = gp; + new->sp = 0; + new->wasgp = 0; + new->nset = 0; + if (cp != 0) /* copy existing context information */ + { + if (cp->sp != 0) /* copy tail of stack */ + { + new->sp = cp->sp; + if (mkstck(np, new, 0) != 0) + return REG_ESPACE; + } + new->wasgp = cp->wasgp; + new->str = cp->str; + new->cnt = cp->cnt; + /* + * Copy any valid subexpression match information + * from the existing context. + */ + if (np->used != 0 && (n = cp->nset) != 0) + { + regmatch_t *rmn = new->rm, *rmo = cp->rm; + + new->nset = n; + for (;; ++rmn, ++rmo) + { + rmn->rm_so = rmo->rm_so; + rmn->rm_eo = rmo->rm_eo; + if (--n == 0) + break; + } + } + } + /* + * Append it to the end of the current Context list. + */ + *np->ecur = new; + np->ecur = &new->next; + return 0; +} + + /* + * Compare two byte string sequences for equality. + * If REG_ICASE, walk through the strings doing + * caseless comparisons of the wide characters. + */ +static int +casecmp(const Uchar *s, Exec *xp, ssize_t i, ssize_t n, int mb_cur_max) +{ + const Uchar *p = &xp->str[i]; + const Uchar *end; + w_type wc1, wc2; + int k; + + if (strncmp((char *)s, (char *)p, n) == 0) /* try for exact match */ + return 1; + if ((xp->flags & REG_ICASE) == 0) + return 0; + /* + * Walk through each testing for a match, ignoring case, + * of the resulting wide characters. + * Note that only "s" can run out of characters. + */ + end = &p[n]; + do + { + if ((wc1 = *s++) == '\0') + return 0; + if (!ISONEBYTE(wc1) && (k = libuxre_mb2wc(&wc1, s)) > 0) + s += k; + if (!ISONEBYTE(wc2 = *p++) && (k = libuxre_mb2wc(&wc2, p)) > 0) + p += k; + if (wc1 != wc2) + { + wc1 = to_lower(wc1); + wc2 = to_lower(wc2); + if (wc1 != wc2) + return 0; + } + } while (p < end); + return 1; +} + +LIBUXRE_STATIC int +libuxre_regnfaexec(Nfa *np, Exec *xp) +{ + const Uchar *s, *s1, *s2; + Context *cp, *cpn; + Graph *gp, *brace; + Stack *sp, *spn; + ssize_t rmso, len; + int i, ret, mb_cur_max; + w_type wc; + size_t n; + + ret = 0; /* assume it matches */ + rmso = -1; /* but no match yet */ + np->cur = 0; + np->step = 0; + np->ecur = &np->cur; + np->estp = &np->step; + if ((np->used = xp->nmatch) < np->rmmin) + np->used = np->rmmin; + s1 = 0; /* one char back */ + s = xp->str; /* current high water in string */ + mb_cur_max = xp->mb_cur_max; + for (;;) + { + /* + * Get next character from string. + * If the engine proper hasn't started and the engine + * requires a particular character to start and this + * character isn't it, try the next one. + */ + for (;;) + { + s2 = s1; + s1 = s; + if (!ISONEBYTE(wc = *s++) && + (i = libuxre_mb2wc(&wc, s)) > 0) + s += i; + if (np->cur != 0 || np->beg == wc || np->beg == 0) + break; + if (np->beg == ROP_BOL) + { + if (s2 == 0 && (xp->flags & REG_NOTBOL) == 0) + break; + if ((xp->flags & REG_NEWLINE) == 0) + goto nomatch; + if (s2 != 0 && *s2 == '\n') + break; + } + if (wc == '\0') + goto nomatch; + } + /* + * Start the engine by inserting a fresh initial context + * if there's no known match as yet. (Once some match + * has been found, the end is near.) + */ + if (rmso < 0 && newctxt(np, 0, np->gp) != 0) + goto err; + /* + * Walk the current Contexts list, trying each. + * "loop" is when a new Context is to be tried, + * "again" is when the same Context continues, + * but wc was not yet matched. + */ + cp = np->cur; + loop:; + gp = cp->gp; + again:; + switch (gp->op) + { + case ROP_BRACE: /* gp->next->op == ROP_MTOR */ + brace = gp; + gp = gp->next; + goto mtor; + case ROP_MTOR: + brace = 0; + mtor:; + if (cp->wasgp != gp) /* first time */ + { + if (mkstck(np, cp, gp) != 0) + goto err; + } + else if (cp->str == s) /* spinning */ + goto poptonext; + cp->str = s; + if (brace != 0) + { + if (cp->cnt >= brace->alt.info.num[1]) + goto poptonext; + if (++cp->cnt <= brace->alt.info.num[0]) + { + gp = gp->alt.ptr; + goto again; + } + if (cp->cnt > BRACE_MAX) + cp->cnt = BRACE_MAX; + } + if (newctxt(np, cp, gp->alt.ptr) != 0) + goto err; + poptonext:; + cp->wasgp = 0; + if ((sp = cp->sp) != 0) /* pop stack */ + { + cp->sp = sp->prev; + cp->wasgp = sp->wasgp; + cp->str = sp->str; + cp->cnt = sp->cnt; + sp->prev = np->sp; + np->sp = sp; + } + /*FALLTHROUGH*/ + case ROP_EMPTY: + tonext:; + gp = gp->next; + goto again; + case ROP_OR: + if (newctxt(np, cp, gp->alt.ptr) != 0) + goto err; + goto tonext; + case ROP_LP: + if ((n = gp->alt.info.sub) < np->used) + { + size_t k; + + cp->rm[n].rm_so = s1 - xp->str; + cp->rm[n].rm_eo = -1; + /* + * Mark any skipped subexpressions as + * failing to participate in the match. + */ + if ((k = cp->nset) < n) + { + regmatch_t *rmp = &cp->rm[k]; + + for (;; rmp++) + { + rmp->rm_so = -1; + rmp->rm_eo = -1; + if (++k >= n) + break; + } + } + cp->nset = n + 1; + } + goto tonext; + case ROP_RP: + if ((n = gp->alt.info.sub) < np->used) + cp->rm[n].rm_eo = s1 - xp->str; + goto tonext; + case ROP_BOL: + if (s2 == 0) + { + if (xp->flags & REG_NOTBOL) + goto failed; + } + else if ((xp->flags & REG_NEWLINE) == 0 || *s2 != '\n') + goto failed; + goto tonext; + case ROP_EOL: + if (wc == '\0') + { + if (xp->flags & REG_NOTEOL) + goto failed; + } + else if ((xp->flags & REG_NEWLINE) == 0 || wc != '\n') + goto failed; + goto tonext; + default: /* character match */ + if (gp->op != wc) + { + if ((xp->flags & REG_ICASE) == 0 + || gp->op != to_lower(wc)) + { + goto failed; + } + } + nextwc:; + cp->gp = gp->next; + tostep:; + cpn = cp->next; + cp->next = 0; + *np->estp = cp; + np->estp = &cp->next; + if ((cp = cpn) == 0) + break; + goto loop; + case ROP_NOTNL: + if (wc == '\n') + goto failed; + /*FALLTHROUGH*/ + case ROP_ANYCH: + if (wc > '\0') + goto nextwc; + /*FALLTHROUGH*/ + case ROP_NONE: + failed:; + cpn = cp->next; + cp->next = np->avail; + np->avail = cp; + if ((cp = cpn) == 0) + break; + goto loop; + case ROP_LT: + if (s2 == 0) + { + if (xp->flags & REG_NOTBOL) + goto failed; + } + else + { + w_type pwc; + + if (wc != '_' && + !iswalnum(mb_cur_max == 1 ? btowc(wc) : wc)) + goto failed; + if (!ISONEBYTE(pwc = *s2)) + libuxre_mb2wc(&pwc, &s2[1]); + if (pwc == '_' || + iswalnum(mb_cur_max== 1 ? btowc(pwc) : pwc)) + goto failed; + } + goto tonext; + case ROP_GT: + if (wc == '_' || + iswalnum(mb_cur_max == 1 ? btowc(wc) : wc)) + goto failed; + goto tonext; + case ROP_BKT: + case ROP_BKTCOPY: + if (cp->wasgp == gp) /* rest of MCCE */ + { + checkspin:; + if (s1 >= cp->str) /* got it all */ + goto poptonext; + goto tostep; + } + if ((i = libuxre_bktmbexec(gp->alt.info.bkt, wc, s, + mb_cur_max)) < 0) + goto failed; + if ((n = i) == 0) /* only matched wc */ + goto nextwc; + spin:; + if (mkstck(np, cp, gp) != 0) + goto err; + cp->gp = gp; /* stay here until reach past s+n */ + cp->str = s + n; + goto tostep; + case ROP_REF: + if (cp->wasgp == gp) /* rest of matched string */ + goto checkspin; + if ((n = gp->alt.info.sub) >= cp->nset) + goto failed; + if ((len = cp->rm[n].rm_eo) < 0) + goto failed; + if ((len -= n = cp->rm[n].rm_so) == 0) + goto tonext; + if (casecmp(s1, xp, n, len, mb_cur_max) == 0) + goto failed; + if ((n = s - s1) >= len) + goto nextwc; + n = len - n; + goto spin; + case ROP_END: /* success! */ + if (xp->flags & REG_NONEMPTY) + { + if (s2 == 0) + goto failed; + } + if (xp->nmatch == 0) + goto match; + /* + * Mark any skipped subexpressions as failing to match. + */ + if ((n = cp->nset) < xp->nmatch) + { + do + { + cp->rm[n].rm_so = -1; + cp->rm[n].rm_eo = -1; + } while (++n < xp->nmatch); + } + /* + * Note the left-most match that's longest. + */ + n = cp->rm[0].rm_so; + if (rmso < 0 || n < rmso) + { + rmso = n; + record:; + memcpy(xp->match, cp->rm, + xp->nmatch * sizeof(regmatch_t)); + goto failed; + } + if (rmso < n || xp->match[0].rm_eo > cp->rm[0].rm_eo) + goto failed; + if (xp->match[0].rm_eo < cp->rm[0].rm_eo) + goto record; +#if 0 /* maximize the lengths of earlier LP...RPs */ + /* + * If both are of the same length and start + * at the same point, choose the one with + * a "longest submatch from left to right" + * where an empty string wins over a nonmatch. + */ + for (n = 1; n < xp->nmatch; n++) + { + ssize_t nlen; + + /* + * First, go with the choice that has any + * match for subexpr n. + */ + len = xp->match[n].rm_eo; + nlen = cp->rm[n].rm_eo; + if (nlen < 0) + { + if (len >= 0) + break; + } + else if (len < 0) + goto record; + /* + * Both have a match; go with the longer. + */ + len -= xp->match[n].rm_so; + nlen -= cp->rm[n].rm_so; + if (nlen < len) + break; + if (nlen > len) + goto record; + } +#else /* take LP and RP as "fence posts" and maximize earlier gaps */ + /* + * If both are of the same length and start + * at the same point, choose the one with + * the larger earlier subpatterns, in which + * each rm_so and rm_eo serves as a separator. + */ + for (n = 1; n < xp->nmatch; n++) + { + ssize_t nlen; + int use; + + if (xp->flags & REG_AVOIDNULL) { + /* + * This is to to satisfy POSIX.1-2001 + * XBD pp. 172-173 ll. 6127-6129, whose + * translation is "do not match null + * expressions if there is a choice". + * See also POSIX.2 interpretation #43 + * in which the question was raised. + * + * The first subexpression of "\(x*\)*" + * must thus match the string "xxx". + */ + use = cp->rm[n].rm_eo - + cp->rm[n].rm_so >= + xp->match[n].rm_eo - + xp->match[n].rm_so || + xp->match[n].rm_so < 0; + } else + use = 1; + /* + * Choose the rightmost ROP_LP as that + * maximizes the gap from before. + */ + len = xp->match[n].rm_so; + nlen = cp->rm[n].rm_so; + if (len < nlen && use) + goto record; + if (len > nlen) + break; + /* + * The ROP_LPs are at the same point: + * Choose the rightmost ROP_RP. + */ + len = xp->match[n].rm_eo; + nlen = cp->rm[n].rm_eo; + if (len < nlen && use) + goto record; + if (len > nlen) + break; + } +#endif + goto failed; + } + /* + * Finished the current Context list. If the input string + * has been entirely scanned, we're done. Otherwise, make + * the next step list current for the next character. + * If the next step list was empty and there's an existing + * match, that's the left-most longest. + */ + if (wc == '\0') + { + if (rmso >= 0) + goto match; + goto nomatch; + } + np->ecur = np->estp; + if ((np->cur = np->step) == 0) + { + if (rmso >= 0) + goto match; + np->ecur = &np->cur; /* was pointing at step */ + } + np->step = 0; + np->estp = &np->step; + } +nomatch:; + ret = REG_NOMATCH; +match:; + np->avail = 0; + for (cp = np->allcp; cp != 0; cp = cpn) + { + cpn = cp->link; + cp->next = np->avail; + np->avail = cp; + } + np->sp = 0; + for (sp = np->allsp; sp != 0; sp = spn) + { + spn = sp->link; + sp->prev = np->sp; + np->sp = sp; + } + return ret; +err:; + ret = REG_ESPACE; + goto match; +} diff --git a/libuxre/regparse.c b/libuxre/regparse.c new file mode 100644 index 0000000..0a5c6b2 --- /dev/null +++ b/libuxre/regparse.c @@ -0,0 +1,1091 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regparse.c 1.12 (gritter) 9/22/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include +#include +#include "re.h" + +LIBUXRE_STATIC void +libuxre_regdeltree(Tree *tp, int all) +{ + if (tp == 0) + return; + if (tp->op < 0) + { + switch (KIND_ROP(tp->op)) + { + case BINARY_ROP: + libuxre_regdeltree(tp->right.ptr, all); + /*FALLTHROUGH*/ + case UNARY_ROP: + libuxre_regdeltree(tp->left.ptr, all); + break; + default: + if (tp->op == ROP_BKT && all) + { + libuxre_bktfree(tp->right.info.bkt); + free(tp->right.info.bkt); + } + break; + } + } + free(tp); +} + +LIBUXRE_STATIC Tree * +libuxre_reg1tree(w_type op, Tree *lp) +{ + Tree *tp; + + if ((tp = malloc(sizeof(Tree))) == 0) + { + if (lp != 0) + libuxre_regdeltree(lp, 1); + return 0; + } + tp->op = op; + tp->left.ptr = lp; + if (lp != 0) + lp->parent = tp; + return tp; +} + +LIBUXRE_STATIC Tree * +libuxre_reg2tree(w_type op, Tree *lp, Tree *rp) +{ + Tree *tp; + + if ((tp = malloc(sizeof(Tree))) == 0) + { + libuxre_regdeltree(lp, 1); + libuxre_regdeltree(rp, 1); + return 0; + } + tp->op = op; + tp->left.ptr = lp; + lp->parent = tp; + tp->right.ptr = rp; + rp->parent = tp; + return tp; +} + +static int +lex(Lex *lxp) +{ + size_t num; + w_type wc; + int n, mb_cur_max; + + mb_cur_max = lxp->mb_cur_max; +nextc: switch (wc = *lxp->pat++) /* interesting ones are single bytes */ + { + case '\0': + lxp->pat--; /* continue to report ROP_END */ + wc = ROP_END; + break; + case '(': + if (lxp->flags & REG_PARENS) + { + leftparen:; + /* + * Must keep track of the closed and + * yet-to-be closed groups as a list. + * Consider (()a(()b(()c(()d... in which + * at each letter another even-numbered + * group is made available, but no + * odd-numbered ones are. + */ + if ((lxp->flags & REG_NOBACKREF) == 0) + { + if (lxp->nleft >= lxp->nclist) /* grow it */ + { + unsigned char *p; + + lxp->nclist += 8; /* arbitrary */ + if ((p = realloc(lxp->clist, + lxp->nclist)) == 0) + { + lxp->err = REG_ESPACE; + return -1; + } + lxp->clist = p; + } + lxp->clist[lxp->nleft] = 0; /* unavailable */ + } + lxp->nleft++; + wc = ROP_LP; + } + break; + case ')': + /* + * For REG_PARENS, only take a right paren as a close + * if there is a matching left paren. + */ + if (lxp->flags & REG_PARENS && lxp->nright < lxp->nleft) + { + lxp->nright++; + rightparen:; + /* + * The group that is being closed is the highest + * numbered as-yet-unclosed group. + */ + if ((lxp->flags & REG_NOBACKREF) == 0) + { + num = lxp->nleft; + while (lxp->clist[--num] != 0) + ; + lxp->clist[num] = 1; + } + wc = ROP_RP; + } + break; + case '.': + wc = ROP_ANYCH; + if (lxp->flags & REG_NEWLINE) + wc = ROP_NOTNL; + break; + case '*': + if (lxp->flags & REG_ADDITIVE) + { + nxtstar: switch (*lxp->pat) + { + case '+': + if ((lxp->flags & REG_PLUS) == 0) + break; + lxp->pat++; + goto nxtstar; + case '?': + if ((lxp->flags & REG_QUEST) == 0) + break; + /*FALLTHRU*/ + case '*': + lxp->pat++; + goto nxtstar; + } + } + wc = ROP_STAR; + break; + case '^': + /* + * Look "behind" to see if this is an anchor. + * Take it as an anchor if it follows an alternation + * operator. (lxp->tok is initially set to ROP_OR.) + */ + if (lxp->flags & REG_ANCHORS || lxp->tok == ROP_OR) { + if (lxp->flags & REG_ADDITIVE) + { + int optional = 0; + + nxtcar: switch (*lxp->pat) + { + case '+': + if ((lxp->flags & REG_PLUS) == 0) + break; + lxp->pat++; + goto nxtcar; + case '?': + if ((lxp->flags & REG_QUEST) == 0) + break; + /*FALLTHRU*/ + case '*': + optional = 1; + lxp->pat++; + goto nxtcar; + } + if (optional) + goto nextc; + } + wc = ROP_BOL; + } + break; + case '$': + /* + * Look ahead to see if this is an anchor, + * unless any '$' is an anchor. + * Take it as an anchor if it occurs just before + * the pattern end or an alternation operator. + */ + if (lxp->flags & REG_ANCHORS || *lxp->pat == '\0' + || (lxp->flags & REG_OR && *lxp->pat == '|') + || (lxp->flags & REG_NLALT && *lxp->pat == '\n')) + { + if (lxp->flags & REG_ADDITIVE) + { + int optional = 0; + + nxtdol: switch (*lxp->pat) + { + case '+': + if ((lxp->flags & REG_PLUS) == 0) + break; + lxp->pat++; + goto nxtdol; + case '?': + if ((lxp->flags & REG_QUEST) == 0) + break; + /*FALLTHRU*/ + case '*': + optional = 1; + lxp->pat++; + goto nxtdol; + } + if (optional) + goto nextc; + } + wc = ROP_EOL; + } + break; + case '+': + if (lxp->flags & REG_PLUS) + { + wc = ROP_PLUS; + if (lxp->flags & REG_ADDITIVE) + { + nxtplus: switch (*lxp->pat) + { + case '?': + if ((lxp->flags & REG_QUEST) == 0) + break; + case '*': + wc = ROP_STAR; + /*FALLTHRU*/ + case '+': + lxp->pat++; + goto nxtplus; + } + } + } + break; + case '?': + if (lxp->flags & REG_QUEST) + { + wc = ROP_QUEST; + if (lxp->flags & REG_ADDITIVE) + { + nxtquest: switch (*lxp->pat) + { + case '+': + if ((lxp->flags & REG_PLUS) == 0) + break; + case '*': + wc = ROP_STAR; + /*FALLTHRU*/ + case '?': + lxp->pat++; + goto nxtquest; + } + } + } + break; + case '\n': + if (lxp->flags & REG_NLALT) + { + /* + * Even when newline is an alternative separator, + * it doesn't permit parenthesized subexpressions + * to include it. + */ + if (lxp->nleft != lxp->nright) + { + lxp->err = REG_EPAREN; + return -1; + } + wc = ROP_OR; + } + else if (lxp->flags & REG_NEWLINE) + lxp->flags |= REG_NFA; + break; + case '|': + if (lxp->flags & REG_OR) + wc = ROP_OR; + break; + case '[': + if ((lxp->info.bkt = malloc(sizeof(Bracket))) == 0) + { + lxp->err = REG_ESPACE; + return -1; + } + if ((lxp->flags & REG_GOTBKT) == 0) /* first time */ + { + struct lc_collate *col; + + lxp->flags |= REG_GOTBKT; + lxp->bktflags = 0; + if (lxp->flags & REG_ICASE) + lxp->bktflags |= BKT_ONECASE; + if (lxp->flags & REG_NEWLINE) + lxp->bktflags |= BKT_NOTNL; + if (lxp->flags & REG_BADRANGE) + lxp->bktflags |= BKT_BADRANGE; + if (lxp->flags & REG_ODDRANGE) + lxp->bktflags |= BKT_ODDRANGE; + if (lxp->flags & REG_SEPRANGE) + lxp->bktflags |= BKT_SEPRANGE; + if (lxp->flags & REG_BKTQUOTE) + lxp->bktflags |= BKT_QUOTE; + if (lxp->flags & REG_BKTEMPTY) + lxp->bktflags |= BKT_EMPTY; + if (lxp->flags & REG_ESCNL) + lxp->bktflags |= BKT_ESCNL; + if (lxp->flags & REG_NLALT) + lxp->bktflags |= BKT_NLBAD; + if (lxp->flags & REG_ESCSEQ) + lxp->bktflags |= BKT_ESCSEQ; + if (lxp->flags & REG_BKTESCAPE) + lxp->bktflags |= BKT_ESCAPE; + if (lxp->flags & REG_NOI18N) + lxp->bktflags |= BKT_NOI18N; + if (lxp->flags & REG_OLDESC) + lxp->bktflags |= BKT_OLDESC; + if ((col = libuxre_lc_collate(0)) != 0) + { + if (col->maintbl == 0 + || col->flags & CHF_ENCODED) + { + (void)libuxre_lc_collate(col); + col = 0; + } + else if (col->flags & CHF_MULTICH) + lxp->flags |= REG_NFA; + } + lxp->col = col; + } + n = lxp->bktflags; + if (*lxp->pat == '^') + { + n |= BKT_NEGATED; + lxp->pat++; + } + lxp->info.bkt->col = lxp->col; + if ((n = libuxre_bktmbcomp(lxp->info.bkt, lxp->pat, + n, mb_cur_max)) < 0) + { + free(lxp->info.bkt); + lxp->err = -n; /* convert to REG_* errors */ + return -1; + } + /* + * NFA forced if newline can be a match and REG_NEWLINE is set. + */ + if ((lxp->flags & (REG_NFA | REG_NEWLINE)) == REG_NEWLINE + && lxp->pat[-1] == '[' /* i.e., not BKT_NEGATED */ + && libuxre_bktmbexec(lxp->info.bkt, '\n', 0, 1) == 0) + { + lxp->flags |= REG_NFA; + } + lxp->pat += n; + wc = ROP_BKT; + break; + case '{': + if (lxp->flags & REG_NOBRACES || (lxp->flags & REG_BRACES) == 0) + break; + interval:; + if (!isdigit(num = *lxp->pat)) + { + badbr:; + lxp->err = REG_BADBR; + if (*lxp->pat == '\0') + lxp->err = REG_EBRACE; /* more accurate */ + return -1; + } + num -= '0'; + while (isdigit(wc = *++lxp->pat)) + { + num *= 10; + if ((num += wc - '0') > BRACE_MAX) + goto badbr; + } + lxp->info.num[0] = num; + lxp->info.num[1] = num; + if (wc == ',') + { + lxp->info.num[1] = BRACE_INF; + if (isdigit(wc = *++lxp->pat)) + { + num = wc - '0'; + while (isdigit(wc = *++lxp->pat)) + { + num *= 10; + if ((num += wc - '0') > BRACE_MAX) + goto badbr; + } + if (num < lxp->info.num[0]) + goto badbr; + lxp->info.num[1] = num; + } + } + if ((lxp->flags & REG_BRACES) == 0) + { + if (wc != '\\') + goto badbr; + wc = *++lxp->pat; + } + if (wc != '}') + goto badbr; + lxp->pat++; + wc = ROP_BRACE; + /* + * Replace interval with simpler equivalents where possible, + * even when the operators are not otherwise available. + */ + if (lxp->info.num[1] <= 1) + { + if (lxp->info.num[0] == 1) + wc = ROP_NOP; /* {1,1} is noise */ + else if (lxp->info.num[1] == 0) + wc = ROP_EMPTY; /* {0,0} is empty string */ + else + wc = ROP_QUEST; /* {0,1} is ? */ + } + else if (lxp->info.num[1] == BRACE_INF) + { + if (lxp->info.num[0] == 0) + wc = ROP_STAR; + else if (lxp->info.num[0] == 1) + wc = ROP_PLUS; + else if (lxp->info.num[0] > BRACE_DFAMAX) + lxp->flags |= REG_NFA; + } + else if (lxp->info.num[1] > BRACE_DFAMAX) + { + lxp->flags |= REG_NFA; + } + break; + case '\\': + switch (wc = *lxp->pat++) + { + case '\0': + lxp->err = REG_EESCAPE; + return -1; + case '<': + if (lxp->flags & REG_ANGLES) + { + lxp->flags |= REG_NFA; + wc = ROP_LT; + } + goto out; + case '>': + if (lxp->flags & REG_ANGLES) + { + lxp->flags |= REG_NFA; + wc = ROP_GT; + } + goto out; + case '(': + if ((lxp->flags & REG_PARENS) == 0) + goto leftparen; + goto out; + case ')': + if ((lxp->flags & REG_PARENS) == 0) + { + if (++lxp->nright > lxp->nleft) + { + lxp->err = REG_EPAREN; + return -1; + } + goto rightparen; + } + goto out; + case '{': + if (lxp->flags & (REG_BRACES|REG_NOBRACES)) + goto out; + goto interval; + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + num = wc - '0'; + if ((lxp->flags & REG_NOBACKREF) == 0) + { + backref:; + if (num > lxp->nleft + || lxp->clist[num - 1] == 0) + { + lxp->err = REG_ESUBREG; + return -1; + } + lxp->info.sub = num; + if (lxp->maxref < num) + lxp->maxref = num; + lxp->flags |= REG_NFA; + wc = ROP_REF; + goto out; + } + /* + * For compatibility (w/awk), permit "octal" 8 and 9. + * Already have the value of the first digit in num. + * + * If REG_OLDESC, exactly three digits must be present. + */ + tryoctal:; + if ((lxp->flags & REG_ESCSEQ) == 0) + goto out; + if ((wc = *lxp->pat) >= '0' && wc <= '9') + { + num <<= 3; + num += wc - '0'; + if ((wc = *++lxp->pat) >= '0' && wc <= '9') + { + num <<= 3; + num += wc - '0'; + lxp->pat++; + } + else if (lxp->flags & REG_OLDESC) + { + lxp->pat--; + wc = lxp->pat[-1]; + goto out; + } + } + else if (lxp->flags & REG_OLDESC) + { + wc = lxp->pat[-1]; + goto out; + } + if ((wc = num) <= 0) + { + lxp->err = REG_BADESC; + return -1; + } + goto out; + case '0': + if ((lxp->flags & REG_NOBACKREF) == 0 + && (num = *lxp->pat) >= '0' && num <= '9') + { + num -= '0'; + /* + * This loop ignores wraparounds. + * Keep track of number of digits in n. + */ + n = 1; + while ((wc = *++lxp->pat) >= '0' && wc <= '9') + { + num *= 10; + num += wc - '0'; + n++; + } + if (num != 0) + goto backref; + lxp->pat -= n; + } + num = 0; + goto tryoctal; + case 'a': + if ((lxp->flags&(REG_ESCSEQ|REG_OLDESC)) == REG_ESCSEQ) + wc = '\a'; + goto out; + case 'b': + if (lxp->flags & REG_ESCSEQ) + wc = '\b'; + goto out; + case 'f': + if (lxp->flags & REG_ESCSEQ) + wc = '\f'; + goto out; + case 'n': + if (lxp->flags & (REG_ESCSEQ | REG_ESCNL)) + { + wc = '\n'; + if (lxp->flags & REG_NEWLINE) + lxp->flags |= REG_NFA; + } + goto out; + case 'r': + if (lxp->flags & REG_ESCSEQ) + wc = '\r'; + goto out; + case 't': + if (lxp->flags & REG_ESCSEQ) + wc = '\t'; + goto out; + case 'v': + if ((lxp->flags&(REG_ESCSEQ|REG_OLDESC)) == REG_ESCSEQ) + wc = '\v'; + goto out; + case 'x': + if ((lxp->flags&(REG_ESCSEQ|REG_OLDESC)) == REG_ESCSEQ + && isxdigit(num = *lxp->pat)) + { + wc = num; + num = 0; + /* + * Take as many hex digits as possible, + * ignoring overflows. + * If the result (squeezed into a w_type) + * is positive, it's okay. + */ + do + { + if (isdigit(wc)) + wc -= '0'; + else if (isupper(wc)) + wc -= 'A' + 10; + else + wc -= 'a' + 10; + num <<= 4; + num |= wc; + } while (isxdigit(wc = *++lxp->pat)); + if ((wc = num) <= 0) + { + lxp->err = REG_BADESC; + return -1; + } + } + goto out; + } + /*FALLTHROUGH*/ + default: + if (!ISONEBYTE(wc)) + { + if ((n = libuxre_mb2wc(&wc, lxp->pat)) > 0) + lxp->pat += n; + else if (n < 0) + { + lxp->err = REG_ILLSEQ; + return -1; + } + } + if (lxp->flags & REG_ICASE) + wc = to_lower(wc); + break; + } +out:; + lxp->tok = wc; + return 0; +} + +static Tree *alt(Lex *); + +static Tree * +leaf(Lex *lxp) +{ + Tree *tp; + + if ((tp = malloc(sizeof(Tree))) == 0) + { + lxp->err = REG_ESPACE; + return 0; + } + switch (tp->op = lxp->tok) /* covers most cases */ + { + default: + if (tp->op < 0) + { + lxp->err = REG_BADPAT; + tp->right.ptr = 0; + goto badunary; + } + break; + case ROP_STAR: + case ROP_PLUS: + case ROP_QUEST: + if ((lxp->flags & REG_NOAUTOQUOTE) == 0 + && lxp->pat[-1] != '}') + { + tp->op = lxp->pat[-1]; + break; + } + /*FALLTHROUGH*/ + case ROP_BRACE: + case ROP_EMPTY: /* was {0,0} ROP_BRACE */ + case ROP_NOP: /* was {1,1} ROP_BRACE */ + lxp->err = REG_BADRPT; + badunary:; + tp->left.ptr = 0; + goto err; + case ROP_ANYCH: + case ROP_NOTNL: + break; + case ROP_BOL: + case ROP_EOL: + case ROP_LT: + case ROP_GT: + /* + * Look ahead for what would have been taken to be + * postfix operators. + */ + if (lex(lxp) != 0) + goto err; + switch (lxp->tok) + { + case ROP_STAR: + case ROP_PLUS: + case ROP_QUEST: + if ((lxp->flags & REG_NOAUTOQUOTE) == 0 + && lxp->pat[-1] != '}') + { + lxp->tok = lxp->pat[-1]; + break; + } + /*FALLTHROUGH*/ + case ROP_BRACE: + case ROP_EMPTY: /* was {0,0} ROP_BRACE */ + case ROP_NOP: /* was {1,1} ROP_BRACE */ + lxp->err = REG_BADRPT; + goto err; + } + return tp; + case ROP_BKT: + tp->right.info.bkt = lxp->info.bkt; + break; + case ROP_REF: + tp->right.info.sub = lxp->info.sub; + break; + case ROP_LP: + tp->right.info.sub = lxp->nleft; + if (lex(lxp) != 0) + goto badunary; + if (lxp->tok == ROP_RP) /* empty parens; choice of meaning */ + { + if (lxp->flags & REG_MTPARENBAD) + { + lxp->err = REG_EMPTYPAREN; + goto badunary; + } + lxp->tok = ROP_EMPTY; + if (lxp->flags & REG_MTPARENFAIL) + lxp->tok = ROP_NONE; + if ((tp->left.ptr = libuxre_reg1tree(lxp->tok, 0)) == 0) + goto badunary; + } + else if ((tp->left.ptr = alt(lxp)) == 0) + { + if (lxp->err == REG_BADPAT) + goto parenerr; + goto badunary; + } + else if (lxp->tok != ROP_RP) + { + lxp->err = REG_BADPAT; + parenerr:; + if (lxp->nleft != lxp->nright) + lxp->err = REG_EPAREN; /* better choice */ + goto badunary; + } + tp->left.ptr->parent = tp; + break; + } + if (lex(lxp) != 0) + { + err:; + libuxre_regdeltree(tp, 1); + tp = 0; + } + return tp; +} + +static Tree * +post(Lex *lxp) +{ + Tree *lp; + + if ((lp = leaf(lxp)) == 0) + return 0; + switch (lxp->tok) + { + case ROP_EMPTY: /* this was {0,0} ROP_BRACE */ + libuxre_regdeltree(lp, 1); + lp = 0; + /*FALLTHROUGH*/ + case ROP_BRACE: + case ROP_STAR: + case ROP_PLUS: + case ROP_QUEST: + if ((lp = libuxre_reg1tree(lxp->tok, lp)) == 0) + { + lxp->err = REG_ESPACE; + return 0; + } + if (lxp->tok == ROP_BRACE) + lp->right.info = lxp->info; + /*FALLTHROUGH*/ + case ROP_NOP: /* this was {1,1} ROP_BRACE */ + if (lex(lxp) != 0) + { + libuxre_regdeltree(lp, 1); + return 0; + } + break; + } + return lp; +} + +static Tree * +cat(Lex *lxp) +{ + Tree *lp, *rp; + + if ((lp = post(lxp)) == 0) + return 0; + for (;;) + { + if (lxp->tok == ROP_OR || lxp->tok == ROP_RP + || lxp->tok == ROP_END) + { + return lp; + } + if ((rp = post(lxp)) == 0) + break; + if ((lp = libuxre_reg2tree(ROP_CAT, lp, rp)) == 0) + { + lxp->err = REG_ESPACE; + return 0; + } + } + libuxre_regdeltree(lp, 1); + return 0; +} + +static Tree * +alt(Lex *lxp) +{ + Tree *lp, *rp; + + if ((lp = cat(lxp)) == 0) + return 0; + for (;;) + { + if (lxp->tok != ROP_OR) + return lp; + if (lex(lxp) != 0) + break; + if (lxp->tok == ROP_END) + return lp; /* ignore trailing '|' */ + if ((rp = cat(lxp)) == 0) + break; + if ((lp = libuxre_reg2tree(ROP_OR, lp, rp)) == 0) + { + lxp->err = REG_ESPACE; + return 0; + } + } + libuxre_regdeltree(lp, 1); + return 0; +} + +LIBUXRE_STATIC Tree * +libuxre_regparse(Lex *lxp, const unsigned char *pat, int flags) +{ + Tree *lp, *rp; + + lp = 0; /* in case of error */ + lxp->clist = 0; + lxp->col = 0; + lxp->err = 0; + lxp->maxref = 0; + lxp->nleft = 0; + lxp->nright = 0; + lxp->nclist = 0; + lxp->mb_cur_max = MB_CUR_MAX; + if (flags & REG_OR && *pat == '|') + pat++; /* skip initial OR like egrep did */ + lxp->pat = pat; + lxp->flags = flags; + lxp->tok = ROP_OR; /* enables ^ as anchor */ + /* + * Get initial token. + */ + if (lex(lxp) != 0) + { + err:; + if (lp != 0) + { + libuxre_regdeltree(lp, 1); + lp = 0; + } + if (lxp->err == 0) + lxp->err = REG_ESPACE; + goto ret; + } + if (lxp->tok == ROP_END) + { + lxp->err = REG_NOPAT; + goto err; + } + if ((lp = alt(lxp)) == 0) /* parse entire RE */ + goto err; + if (lxp->maxref != 0 || (flags & REG_NOSUB) == 0) + { + if ((lp = libuxre_reg1tree(ROP_LP, lp)) == 0) + goto err; + lp->right.info.sub = 0; + } + if ((rp = libuxre_reg1tree(ROP_END, 0)) == 0) + goto err; + if ((lp = libuxre_reg2tree(ROP_CAT, lp, rp)) == 0) + goto err; + lp->parent = 0; +ret:; + if (lxp->clist != 0) + free(lxp->clist); + return lp; +} + +#ifdef REGDEBUG + +LIBUXRE_STATIC void +libuxre_regtree(Tree *tp, int n) +{ + const char *opstr; + char buf[32]; + int kind, next; + + if (n < 0) + next = -n + 2; + else + next = n + 2; + switch (tp->op) + { + case ROP_OR: + opstr = "|"; + kind = BINARY_ROP; + break; + case ROP_CAT: + opstr = "&"; + kind = BINARY_ROP; + break; + case ROP_STAR: + opstr = "*"; + kind = UNARY_ROP; + break; + case ROP_PLUS: + opstr = "+"; + kind = UNARY_ROP; + break; + case ROP_QUEST: + opstr = "?"; + kind = UNARY_ROP; + break; + case ROP_BRACE: + opstr = buf; + if (tp->right.info.num[1] == BRACE_INF) + { + sprintf(buf, "{%u,inf}", + (unsigned)tp->right.info.num[0]); + } + else + { + sprintf(buf, "{%u,%u}", + (unsigned)tp->right.info.num[0], + (unsigned)tp->right.info.num[1]); + } + kind = UNARY_ROP; + break; + case ROP_LP: + opstr = buf; + sprintf(buf, "%lu(", (unsigned long)tp->right.info.sub); + kind = UNARY_ROP; + break; + case ROP_RP: + opstr = buf; + sprintf(buf, ")%lu", (unsigned long)tp->right.info.sub); + kind = UNARY_ROP; + break; + case ROP_NOP: + opstr = ""; + kind = LEAF_ROP; + break; + case ROP_BOL: + opstr = ""; + kind = LEAF_ROP; + break; + case ROP_EOL: + opstr = ""; + kind = LEAF_ROP; + break; + case ROP_ALL: + opstr = ""; + kind = LEAF_ROP; + break; + case ROP_ANYCH: + opstr = ""; + kind = LEAF_ROP; + break; + case ROP_NOTNL: + opstr = ""; + kind = LEAF_ROP; + break; + case ROP_EMPTY: + opstr = ""; + kind = LEAF_ROP; + break; + case ROP_NONE: + opstr = ""; + kind = LEAF_ROP; + break; + case ROP_BKT: + opstr = buf; + sprintf(buf, "[%#lx]", (unsigned long)tp->right.info.bkt); + kind = LEAF_ROP; + break; + case ROP_BKTCOPY: + opstr = buf; + sprintf(buf, "[%#lx]CPY", (unsigned long)tp->right.info.bkt); + kind = LEAF_ROP; + break; + case ROP_LT: + opstr = "\\<"; + kind = LEAF_ROP; + break; + case ROP_GT: + opstr = "\\>"; + kind = LEAF_ROP; + break; + case ROP_REF: + opstr = buf; + sprintf(buf, "\\%lu", (unsigned long)tp->right.info.sub); + kind = LEAF_ROP; + break; + case ROP_END: + opstr = ""; + kind = LEAF_ROP; + break; + default: + opstr = buf; + if (tp->op > UCHAR_MAX) + sprintf(buf, "W%#x", tp->op); + else if (tp->op <= 0) + sprintf(buf, "UNK=%u", tp->op); + else + sprintf(buf, "%c", tp->op); + kind = LEAF_ROP; + break; + } + if (kind == BINARY_ROP) + libuxre_regtree(tp->right.ptr, -next); + printf("%*c:%s\n", next - 1, n < 0 ? 'R' : n > 0 ? 'L' : 'T', opstr); + if (kind != LEAF_ROP) + libuxre_regtree(tp->left.ptr, next); +} + +#endif /*REGDEBUG*/ diff --git a/libuxre/stubs.c b/libuxre/stubs.c new file mode 100644 index 0000000..bd670db --- /dev/null +++ b/libuxre/stubs.c @@ -0,0 +1,97 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)stubs.c 1.24 (gritter) 10/12/04 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* stubbed-out routines needed to complete the RE libc code */ + +#include "colldata.h" + +struct lc_collate * +libuxre_lc_collate(struct lc_collate *cp) +{ + static struct lc_collate curinfo = {0}; /* means CHF_ENCODED */ + + return &curinfo; +} + +#include "wcharm.h" + +LIBUXRE_STATIC int +libuxre_mb2wc(w_type *wt, const unsigned char *s) +{ + wchar_t wc; + int len; + + if ((len = mbtowc(&wc, (const char *)&s[-1], MB_LEN_MAX)) > 0) + *wt = wc; + else if (len == 0) + *wt = '\0'; + else /*if (len < 0)*/ + *wt = (w_type)WEOF; + return len > 0 ? len - 1 : len; +} + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)libuxre.sl 1.24 (gritter) 10/12/04"; +/* +_collelem.c: + _collelem.c 1.4 (gritter) 10/18/03 +_collmult.c: + _collmult.c 1.4 (gritter) 9/22/03 +bracket.c: + bracket.c 1.14 (gritter) 10/18/03 +colldata.h: + colldata.h 1.4 (gritter) 10/18/03 +onefile.c: + onefile.c 1.1 (gritter) 9/22/03 +re.h: + re.h 1.14 (gritter) 10/18/03 +regcomp.c: + regcomp.c 1.6 (gritter) 9/22/03 +regdfa.c: + regdfa.c 1.9 (gritter) 9/22/03 +regdfa.h: + regdfa.h 1.3 (gritter) 9/22/03 +regerror.c: + regerror.c 1.4 (gritter) 3/29/03 +regex.h: + regex.h 1.12 (gritter) 9/22/03 +regexec.c: + regexec.c 1.6 (gritter) 9/22/03 +regfree.c: + regfree.c 1.3 (gritter) 9/22/03 +regnfa.c: + regnfa.c 1.7 (gritter) 9/22/03 +regparse.c: + regparse.c 1.12 (gritter) 9/22/03 +wcharm.h: + wcharm.h 1.12 (gritter) 10/18/03 +*/ diff --git a/libuxre/wcharm.h b/libuxre/wcharm.h new file mode 100644 index 0000000..8985d6b --- /dev/null +++ b/libuxre/wcharm.h @@ -0,0 +1,63 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)wcharm.h 1.12 (gritter) 10/18/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* Stubbed-out wide character locale information */ + +#ifndef LIBUXRE_WCHARM_H +#define LIBUXRE_WCHARM_H + +#ifndef LIBUXRE_STATIC +#define LIBUXRE_STATIC +#endif + +#ifndef LIBUXRE_WUCHAR_T +#define LIBUXRE_WUCHAR_T +typedef unsigned int wuchar_type; +#endif + +#ifndef LIBUXRE_W_TYPE +#define LIBUXRE_W_TYPE +typedef int w_type; +#endif + +#include +#include +#include + +#ifdef notdef +#define ISONEBYTE(ch) ((ch), 1) + +#define libuxre_mb2wc(wp, cp) ((wp), (cp), 0) +#endif /* notdef */ + +#define ISONEBYTE(ch) (((ch) & 0200) == 0 || mb_cur_max == 1) + +#define to_lower(ch) (mb_cur_max > 1 ? towlower(ch) : tolower(ch)) +#define to_upper(ch) (mb_cur_max > 1 ? towupper(ch) : toupper(ch)) + +LIBUXRE_STATIC int libuxre_mb2wc(w_type *, const unsigned char *); + +#endif /* !LIBUXRE_WCHARM_H */ diff --git a/mapmalloc.c b/mapmalloc.c index 5869ca3..017b992 100644 --- a/mapmalloc.c +++ b/mapmalloc.c @@ -1,7 +1,8 @@ /* * AT&T Unix 7th Edition memory allocation routines. * - * Modified by Gunnar Ritter, Freiburg i. Br., Germany, February 2005. + * Modified for ex by Gunnar Ritter, Freiburg i. Br., Germany, + * February 2005. * * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. * @@ -35,7 +36,7 @@ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * Sccsid @(#)mapmalloc.c 1.7 (gritter) 8/18/05 + * Sccsid @(#)mapmalloc.c 1.4 (gritter) 2/20/05 */ #ifdef VMUNIX @@ -44,7 +45,6 @@ #include #include #include -#include #ifndef MAP_FAILED #define MAP_FAILED ((void *)-1) @@ -77,6 +77,7 @@ #include #include #include +#include int botch(char *s) { @@ -107,13 +108,6 @@ void dump(const char *msg, uintptr_t t) #define dump(a, b) #endif -#ifdef valgrind -#include -#else /* !valgrind */ -#define VALGRIND_MALLOCLIKE_BLOCK(a, b, c, d) -#define VALGRIND_FREELIKE_BLOCK(a, b) -#endif /* !valgrind */ - /* avoid break bug */ #ifdef pdp11 #define GRANULE 64 @@ -200,8 +194,8 @@ map(void *addr, size_t len) return(mmap(addr,len,PROT_READ|PROT_WRITE,flags,fd,0)); } -static void * -mallock(size_t nbytes, union store *start, union store *end) +void * +malloc(size_t nbytes) { register union store *p, *q; struct pool *o; @@ -239,9 +233,7 @@ first: if(allocs[0].ptr==0) { /*first time for this pool*/ if (ua) allocp = p->ptr; } - if(q>=p+nw && p+nw>=p && (start==NULL || - p+nwend || - p+2==start)) + if(q>=p+nw && p+nw>=p) goto found; } q = p; @@ -300,16 +292,9 @@ found: p->ptr = setbusy(allocp); p[1].pool = o; dump("malloc", (uintptr_t)(p + 2)); - VALGRIND_MALLOCLIKE_BLOCK(p+2,nbytes,0,0); return(p+2); } -void * -malloc(size_t nbytes) -{ - return mallock(nbytes, NULL, NULL); -} - /* freeing strategy tuned for LIFO allocation */ void @@ -328,7 +313,6 @@ free(register void *ap) ASSERT(testbusy(p->ptr)); p->ptr = clearbusy(p->ptr); ASSERT(p->ptr > allocp && p->ptr <= alloct); - VALGRIND_FREELIKE_BLOCK(ap,0); } /* realloc(p, nbytes) reallocates a block obtained from malloc() @@ -357,7 +341,7 @@ realloc(void *ap, size_t nbytes) free(p); onw = p[-2].ptr - p; o = p[-1].pool; - q = mallock(nbytes, p, &p[onw]); + q = malloc(nbytes); if(q==NULL || q==p) return(q); s = p; diff --git a/regexp.h b/regexp.h new file mode 100644 index 0000000..fad1c74 --- /dev/null +++ b/regexp.h @@ -0,0 +1,1210 @@ +/* + * Simple Regular Expression functions. Derived from Unix 7th Edition, + * /usr/src/cmd/expr.y + * + * Modified by Gunnar Ritter, Freiburg i. Br., Germany, February 2002. + * + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 +#define REGEXP_H_USED __attribute__ ((used)) +#elif defined __GNUC__ +#define REGEXP_H_USED __attribute__ ((unused)) +#else +#define REGEXP_H_USED +#endif +static const char regexp_h_sccsid[] REGEXP_H_USED = + "@(#)regexp.sl 1.54 (gritter) 2/19/05"; + +#if !defined (REGEXP_H_USED_FROM_VI) && !defined (__dietlibc__) +#define REGEXP_H_WCHARS +#endif + +#define CBRA 2 +#define CCHR 4 +#define CDOT 8 +#define CCL 12 +/* CLNUM 14 used in sed */ +/* CEND 16 used in sed */ +#define CDOL 20 +#define CCEOF 22 +#define CKET 24 +#define CBACK 36 +#define CNCL 40 +#define CBRC 44 +#define CLET 48 +#define CCH1 52 +#define CCH2 56 +#define CCH3 60 + +#define STAR 01 +#define RNGE 03 +#define REGEXP_H_LEAST 0100 + +#ifdef REGEXP_H_WCHARS +#define CMB 0200 +#else /* !REGEXP_H_WCHARS */ +#define CMB 0 +#endif /* !REGEXP_H_WCHARS */ + +#define NBRA 9 + +#define PLACE(c) ep[c >> 3] |= bittab[c & 07] +#define ISTHERE(c) (ep[c >> 3] & bittab[c & 07]) + +#ifdef REGEXP_H_WCHARS +#define REGEXP_H_IS_THERE(ep, c) ((ep)[c >> 3] & bittab[c & 07]) +#endif + +#include +#include +#include +#ifdef REGEXP_H_WCHARS +#include +#include +#include +#endif /* REGEXP_H_WCHARS */ + +#define regexp_h_uletter(c) (isalpha(c) || (c) == '_') +#ifdef REGEXP_H_WCHARS +#define regexp_h_wuletter(c) (iswalpha(c) || (c) == L'_') + +/* + * Used to allocate memory for the multibyte star algorithm. + */ +#ifndef regexp_h_malloc +#define regexp_h_malloc(n) malloc(n) +#endif +#ifndef regexp_h_free +#define regexp_h_free(p) free(p) +#endif + +/* + * Can be predefined to 'inline' to inline some multibyte functions; + * may improve performance for files that contain many multibyte + * sequences. + */ +#ifndef regexp_h_inline +#define regexp_h_inline +#endif + +/* + * Mask to determine whether the first byte of a sequence possibly + * starts a multibyte character. Set to 0377 to force mbtowc() for + * any byte sequence (except 0). + */ +#ifndef REGEXP_H_MASK +#define REGEXP_H_MASK 0200 +#endif +#endif /* REGEXP_H_WCHARS */ + +/* + * For regexpr.h. + */ +#ifndef regexp_h_static +#define regexp_h_static +#endif +#ifndef REGEXP_H_STEP_INIT +#define REGEXP_H_STEP_INIT +#endif +#ifndef REGEXP_H_ADVANCE_INIT +#define REGEXP_H_ADVANCE_INIT +#endif + +char *braslist[NBRA]; +char *braelist[NBRA]; +int nbra; +char *loc1, *loc2, *locs; +int sed; +int nodelim; + +regexp_h_static int circf; +regexp_h_static int low; +regexp_h_static int size; + +regexp_h_static unsigned char bittab[] = { + 1, + 2, + 4, + 8, + 16, + 32, + 64, + 128 +}; +static int regexp_h_advance(register const char *lp, + register const char *ep); +static void regexp_h_getrnge(register const char *str, int least); + +static const char *regexp_h_bol; /* beginning of input line (for \<) */ + +#ifdef REGEXP_H_WCHARS +static int regexp_h_wchars; +static int regexp_h_mbcurmax; + +static const char *regexp_h_firstwc; /* location of first + multibyte character + on input line */ + +#define regexp_h_getwc(c) { \ + if (regexp_h_wchars) { \ + char mbbuf[MB_LEN_MAX + 1], *mbptr; \ + wchar_t wcbuf; \ + int mb, len; \ + mbptr = mbbuf; \ + do { \ + mb = GETC(); \ + *mbptr++ = mb; \ + *mbptr = '\0'; \ + } while ((len = mbtowc(&wcbuf, mbbuf, regexp_h_mbcurmax)) < 0 \ + && mb != eof && mbptr < mbbuf + MB_LEN_MAX); \ + if (len == -1) \ + ERROR(67); \ + c = wcbuf; \ + } else { \ + c = GETC(); \ + } \ +} + +#define regexp_h_store(wc, mb, me) { \ + int len; \ + if (wc == WEOF) \ + ERROR(67); \ + if ((len = me - mb) <= regexp_h_mbcurmax) { \ + char mt[MB_LEN_MAX]; \ + if (wctomb(mt, wc) >= len) \ + ERROR(50); \ + } \ + switch (len = wctomb(mb, wc)) { \ + case -1: \ + ERROR(67); \ + case 0: \ + mb++; \ + break; \ + default: \ + mb += len; \ + } \ +} + +static regexp_h_inline wint_t +regexp_h_fetchwc(const char **mb, int islp) +{ + wchar_t wc; + int len; + + if ((len = mbtowc(&wc, *mb, regexp_h_mbcurmax)) < 0) { + (*mb)++; + return WEOF; + } + if (islp && regexp_h_firstwc == NULL) + regexp_h_firstwc = *mb; + /*if (len == 0) { + (*mb)++; + return L'\0'; + } handled in singlebyte code */ + *mb += len; + return wc; +} + +#define regexp_h_fetch(mb, islp) ((*(mb) & REGEXP_H_MASK) == 0 ? \ + (*(mb)++&0377): \ + regexp_h_fetchwc(&(mb), islp)) + +static regexp_h_inline wint_t +regexp_h_showwc(const char *mb) +{ + wchar_t wc; + + if (mbtowc(&wc, mb, regexp_h_mbcurmax) < 0) + return WEOF; + return wc; +} + +#define regexp_h_show(mb) ((*(mb) & REGEXP_H_MASK) == 0 ? (*(mb)&0377): \ + regexp_h_showwc(mb)) + +/* + * Return the character immediately preceding mb. Since no byte is + * required to be the first byte of a character, the longest multibyte + * character ending at &[mb-1] is searched. + */ +static regexp_h_inline wint_t +regexp_h_previous(const char *mb) +{ + const char *p = mb; + wchar_t wc, lastwc = WEOF; + int len, max = 0; + + if (regexp_h_firstwc == NULL || mb <= regexp_h_firstwc) + return (mb > regexp_h_bol ? (mb[-1] & 0377) : WEOF); + while (p-- > regexp_h_bol) { + mbtowc(NULL, NULL, 0); + if ((len = mbtowc(&wc, p, mb - p)) >= 0) { + if (len < max || len < mb - p) + break; + max = len; + lastwc = wc; + } else if (len < 0 && max > 0) + break; + } + return lastwc; +} + +#define regexp_h_cclass(set, c, af) \ + ((c) == 0 || (c) == WEOF ? 0 : ( \ + ((c) > 0177) ? \ + regexp_h_cclass_wc(set, c, af) : ( \ + REGEXP_H_IS_THERE((set)+1, (c)) ? (af) : !(af) \ + ) \ + ) \ + ) + +static regexp_h_inline int +regexp_h_cclass_wc(const char *set, register wint_t c, int af) +{ + register wint_t wc, wl = WEOF; + const char *end; + + end = &set[18] + set[0] - 1; + set += 17; + while (set < end) { + wc = regexp_h_fetch(set, 0); +#ifdef REGEXP_H_VI_BACKSLASH + if (wc == '\\' && set < end && + (*set == ']' || *set == '-' || + *set == '^' || *set == '\\')) { + wc = regexp_h_fetch(set, 0); + } else +#endif /* REGEXP_H_VI_BACKSLASH */ + if (wc == '-' && wl != WEOF && set < end) { + wc = regexp_h_fetch(set, 0); +#ifdef REGEXP_H_VI_BACKSLASH + if (wc == '\\' && set < end && + (*set == ']' || *set == '-' || + *set == '^' || *set == '\\')) { + wc = regexp_h_fetch(set, 0); + } +#endif /* REGEXP_H_VI_BACKSLASH */ + if (c > wl && c < wc) + return af; + } + if (c == wc) + return af; + wl = wc; + } + return !af; +} +#else /* !REGEXP_H_WCHARS */ +#define regexp_h_wchars 0 +#define regexp_h_getwc(c) { c = GETC(); } +#endif /* !REGEXP_H_WCHARS */ + +regexp_h_static char * +compile(char *instring, char *ep, const char *endbuf, int seof) +{ + INIT /* Dependent declarations and initializations */ + register int c; + register int eof = seof; + char *lastep = instring; + int cclcnt; + char bracket[NBRA], *bracketp; + int closed; + char neg; + int lc; + int i, cflg; + +#ifdef REGEXP_H_WCHARS + char *eq; + regexp_h_mbcurmax = MB_CUR_MAX; + regexp_h_wchars = regexp_h_mbcurmax > 1 ? CMB : 0; +#endif + lastep = 0; + bracketp = bracket; + if((c = GETC()) == eof || c == '\n') { + if (c == '\n') { + UNGETC(c); + nodelim = 1; + } + if(*ep == 0 && !sed) + ERROR(41); + if (bracketp > bracket) + ERROR(42); + RETURN(ep); + } + circf = closed = nbra = 0; + if (c == '^') + circf++; + else + UNGETC(c); + for (;;) { + if (ep >= endbuf) + ERROR(50); + regexp_h_getwc(c); + if(c != '*' && ((c != '\\') || (PEEKC() != '{'))) + lastep = ep; + if (c == eof) { + *ep++ = CCEOF; + if (bracketp > bracket) + ERROR(42); + RETURN(ep); + } + switch (c) { + + case '.': + *ep++ = CDOT|regexp_h_wchars; + continue; + + case '\n': + if (sed == 0) { + UNGETC(c); + *ep++ = CCEOF; + nodelim = 1; + RETURN(ep); + } + ERROR(36); + case '*': + if (lastep==0 || *lastep==CBRA || *lastep==CKET || + *lastep==(CBRC|regexp_h_wchars) || + *lastep==(CLET|regexp_h_wchars)) + goto defchar; + *lastep |= STAR; + continue; + + case '$': + if(PEEKC() != eof) + goto defchar; + *ep++ = CDOL; + continue; + + case '[': +#ifdef REGEXP_H_WCHARS + if (regexp_h_wchars == 0) { +#endif + if(&ep[33] >= endbuf) + ERROR(50); + + *ep++ = CCL; + lc = 0; + for(i = 0; i < 32; i++) + ep[i] = 0; + + neg = 0; + if((c = GETC()) == '^') { + neg = 1; + c = GETC(); + } + + do { + c &= 0377; + if(c == '\0' || c == '\n') + ERROR(49); +#ifdef REGEXP_H_VI_BACKSLASH + if(c == '\\' && ((c = PEEKC()) == ']' || + c == '-' || c == '^' || + c == '\\')) { + c = GETC(); + c &= 0377; + } else +#endif /* REGEXP_H_VI_BACKSLASH */ + if(c == '-' && lc != 0) { + if ((c = GETC()) == ']') { + PLACE('-'); + break; + } +#ifdef REGEXP_H_VI_BACKSLASH + if(c == '\\' && + ((c = PEEKC()) == ']' || + c == '-' || + c == '^' || + c == '\\')) + c = GETC(); +#endif /* REGEXP_H_VI_BACKSLASH */ + c &= 0377; + while(lc < c) { + PLACE(lc); + lc++; + } + } + lc = c; + PLACE(c); + } while((c = GETC()) != ']'); + if(neg) { + for(cclcnt = 0; cclcnt < 32; cclcnt++) + ep[cclcnt] ^= 0377; + ep[0] &= 0376; + } + + ep += 32; +#ifdef REGEXP_H_WCHARS + } else { + if (&ep[18] >= endbuf) + ERROR(50); + *ep++ = CCL|CMB; + *ep++ = 0; + lc = 0; + for (i = 0; i < 16; i++) + ep[i] = 0; + eq = &ep[16]; + regexp_h_getwc(c); + if (c == L'^') { + regexp_h_getwc(c); + ep[-2] = CNCL|CMB; + } + do { + if (c == '\0' || c == '\n') + ERROR(49); +#ifdef REGEXP_H_VI_BACKSLASH + if(c == '\\' && ((c = PEEKC()) == ']' || + c == '-' || c == '^' || + c == '\\')) { + regexp_h_store(c, eq, endbuf); + regexp_h_getwc(c); + } else +#endif /* REGEXP_H_VI_BACKSLASH */ + if (c == '-' && lc != 0 && lc <= 0177) { + regexp_h_store(c, eq, endbuf); + regexp_h_getwc(c); + if (c == ']') { + PLACE('-'); + break; + } +#ifdef REGEXP_H_VI_BACKSLASH + if(c == '\\' && + ((c = PEEKC()) == ']' || + c == '-' || + c == '^' || + c == '\\')) { + regexp_h_store(c, eq, + endbuf); + regexp_h_getwc(c); + } +#endif /* REGEXP_H_VI_BACKSLASH */ + while (lc < (c & 0177)) { + PLACE(lc); + lc++; + } + } + lc = c; + if (c <= 0177) + PLACE(c); + regexp_h_store(c, eq, endbuf); + regexp_h_getwc(c); + } while (c != L']'); + if ((i = eq - &ep[16]) > 255) + ERROR(50); + lastep[1] = i; + ep = eq; + } +#endif /* REGEXP_H_WCHARS */ + + continue; + + case '\\': + regexp_h_getwc(c); + switch(c) { + + case '(': + if(nbra >= NBRA) + ERROR(43); + *bracketp++ = nbra; + *ep++ = CBRA; + *ep++ = nbra++; + continue; + + case ')': + if(bracketp <= bracket) + ERROR(42); + *ep++ = CKET; + *ep++ = *--bracketp; + closed++; + continue; + + case '<': + *ep++ = CBRC|regexp_h_wchars; + continue; + + case '>': + *ep++ = CLET|regexp_h_wchars; + continue; + + case '{': + if(lastep == (char *) (0)) + goto defchar; + *lastep |= RNGE; + cflg = 0; + nlim: + c = GETC(); + i = 0; + do { + if ('0' <= c && c <= '9') + i = 10 * i + c - '0'; + else + ERROR(16); + } while(((c = GETC()) != '\\') && (c != ',')); + if (i > 255) + ERROR(11); + *ep++ = i; + if (c == ',') { + if(cflg++) + ERROR(44); + if((c = GETC()) == '\\') { + *ep++ = (char)255; + *lastep |= REGEXP_H_LEAST; + } else { + UNGETC(c); + goto nlim; /* get 2'nd number */ + } + } + if(GETC() != '}') + ERROR(45); + if(!cflg) /* one number */ + *ep++ = i; + else if((ep[-1] & 0377) < (ep[-2] & 0377)) + ERROR(46); + continue; + + case '\n': + ERROR(36); + + case 'n': + c = '\n'; + goto defchar; + + default: + if(c >= '1' && c <= '9') { + if((c -= '1') >= closed) + ERROR(25); + *ep++ = CBACK; + *ep++ = c; + continue; + } + } + /* Drop through to default to use \ to turn off special chars */ + + defchar: + default: + lastep = ep; +#ifdef REGEXP_H_WCHARS + if (regexp_h_wchars == 0) { +#endif + *ep++ = CCHR; + *ep++ = c; +#ifdef REGEXP_H_WCHARS + } else { + char mbbuf[MB_LEN_MAX]; + + switch (wctomb(mbbuf, c)) { + case 1: *ep++ = CCH1; + break; + case 2: *ep++ = CCH2; + break; + case 3: *ep++ = CCH3; + break; + default: + *ep++ = CCHR|CMB; + } + regexp_h_store(c, ep, endbuf); + } +#endif /* REGEXP_H_WCHARS */ + } + } +} + +int +step(const char *p1, const char *p2) +{ + register int c; +#ifdef REGEXP_H_WCHARS + register int d; +#endif /* REGEXP_H_WCHARS */ + + REGEXP_H_STEP_INIT /* get circf */ + regexp_h_bol = p1; +#ifdef REGEXP_H_WCHARS + regexp_h_firstwc = NULL; +#endif /* REGEXP_H_WCHARS */ + if (circf) { + loc1 = (char *)p1; + return(regexp_h_advance(p1, p2)); + } + /* fast check for first character */ + if (*p2==CCHR) { + c = p2[1] & 0377; + do { + if ((*p1 & 0377) != c) + continue; + if (regexp_h_advance(p1, p2)) { + loc1 = (char *)p1; + return(1); + } + } while (*p1++); + return(0); + } +#ifdef REGEXP_H_WCHARS + else if (*p2==CCH1) { + do { + if (p1[0] == p2[1] && regexp_h_advance(p1, p2)) { + loc1 = (char *)p1; + return(1); + } + c = regexp_h_fetch(p1, 1); + } while (c); + return(0); + } else if (*p2==CCH2) { + do { + if (p1[0] == p2[1] && p1[1] == p2[2] && + regexp_h_advance(p1, p2)) { + loc1 = (char *)p1; + return(1); + } + c = regexp_h_fetch(p1, 1); + } while (c); + return(0); + } else if (*p2==CCH3) { + do { + if (p1[0] == p2[1] && p1[1] == p2[2] && p1[2] == p2[3]&& + regexp_h_advance(p1, p2)) { + loc1 = (char *)p1; + return(1); + } + c = regexp_h_fetch(p1, 1); + } while (c); + return(0); + } else if ((*p2&0377)==(CCHR|CMB)) { + d = regexp_h_fetch(p2, 0); + do { + c = regexp_h_fetch(p1, 1); + if (c == d && regexp_h_advance(p1, p2)) { + loc1 = (char *)p1; + return(1); + } + } while(c); + return(0); + } + /* regular algorithm */ + if (regexp_h_wchars) + do { + if (regexp_h_advance(p1, p2)) { + loc1 = (char *)p1; + return(1); + } + c = regexp_h_fetch(p1, 1); + } while (c); + else +#endif /* REGEXP_H_WCHARS */ + do { + if (regexp_h_advance(p1, p2)) { + loc1 = (char *)p1; + return(1); + } + } while (*p1++); + return(0); +} + +#ifdef REGEXP_H_WCHARS +/* + * It is painfully slow to read character-wise backwards in a + * multibyte string (see regexp_h_previous() above). For the star + * algorithm, we therefore keep track of every character as it is + * read in forward direction. + * + * Don't use alloca() for stack blocks since there is no measurable + * speedup and huge amounts of memory are used up for long input + * lines. + */ +#ifndef REGEXP_H_STAKBLOK +#define REGEXP_H_STAKBLOK 1000 +#endif + +struct regexp_h_stack { + struct regexp_h_stack *s_nxt; + struct regexp_h_stack *s_prv; + const char *s_ptr[REGEXP_H_STAKBLOK]; +}; + +#define regexp_h_push(sb, sp, sc, lp) (regexp_h_wchars ? \ + regexp_h_pushwc(sb, sp, sc, lp) : (void)0) + +static regexp_h_inline void +regexp_h_pushwc(struct regexp_h_stack **sb, + struct regexp_h_stack **sp, + const char ***sc, const char *lp) +{ + if (regexp_h_firstwc == NULL || lp < regexp_h_firstwc) + return; + if (*sb == NULL) { + if ((*sb = regexp_h_malloc(sizeof **sb)) == NULL) + return; + (*sb)->s_nxt = (*sb)->s_prv = NULL; + *sp = *sb; + *sc = &(*sb)->s_ptr[0]; + } else if (*sc >= &(*sp)->s_ptr[REGEXP_H_STAKBLOK]) { + if ((*sp)->s_nxt == NULL) { + struct regexp_h_stack *bq; + + if ((bq = regexp_h_malloc(sizeof *bq)) == NULL) + return; + bq->s_nxt = NULL; + bq->s_prv = *sp; + (*sp)->s_nxt = bq; + *sp = bq; + } else + *sp = (*sp)->s_nxt; + *sc = &(*sp)->s_ptr[0]; + } + *(*sc)++ = lp; +} + +static regexp_h_inline const char * +regexp_h_pop(struct regexp_h_stack **sp, const char ***sc, + const char *lp) +{ + if (regexp_h_firstwc == NULL || lp <= regexp_h_firstwc) + return &lp[-1]; + if (*sp == NULL) + return regexp_h_firstwc; + if (*sc == &(*sp)->s_ptr[0]) { + if ((*sp)->s_prv == NULL) { + regexp_h_free(*sp); + *sp = NULL; + return regexp_h_firstwc; + } + *sp = (*sp)->s_prv; + regexp_h_free((*sp)->s_nxt); + (*sp)->s_nxt = NULL ; + *sc = &(*sp)->s_ptr[REGEXP_H_STAKBLOK]; + } + return *(--(*sc)); +} + +static void +regexp_h_zerostak(struct regexp_h_stack **sb, struct regexp_h_stack **sp) +{ + for (*sp = *sb; *sp && (*sp)->s_nxt; *sp = (*sp)->s_nxt) + if ((*sp)->s_prv) + regexp_h_free((*sp)->s_prv); + if (*sp) { + if ((*sp)->s_prv) + regexp_h_free((*sp)->s_prv); + regexp_h_free(*sp); + } + *sp = *sb = NULL; +} +#else /* !REGEXP_H_WCHARS */ +#define regexp_h_push(sb, sp, sc, lp) +#endif /* !REGEXP_H_WCHARS */ + +static int +regexp_h_advance(const char *lp, const char *ep) +{ + register const char *curlp; + int c, least; +#ifdef REGEXP_H_WCHARS + int d; + struct regexp_h_stack *sb = NULL, *sp = NULL; + const char **sc; +#endif /* REGEXP_H_WCHARS */ + char *bbeg; + int ct; + + for (;;) switch (least = *ep++ & 0377, least & ~REGEXP_H_LEAST) { + + case CCHR: +#ifdef REGEXP_H_WCHARS + case CCH1: +#endif + if (*ep++ == *lp++) + continue; + return(0); + +#ifdef REGEXP_H_WCHARS + case CCHR|CMB: + if (regexp_h_fetch(ep, 0) == regexp_h_fetch(lp, 1)) + continue; + return(0); + + case CCH2: + if (ep[0] == lp[0] && ep[1] == lp[1]) { + ep += 2, lp += 2; + continue; + } + return(0); + + case CCH3: + if (ep[0] == lp[0] && ep[1] == lp[1] && ep[2] == lp[2]) { + ep += 3, lp += 3; + continue; + } + return(0); +#endif /* REGEXP_H_WCHARS */ + + case CDOT: + if (*lp++) + continue; + return(0); +#ifdef REGEXP_H_WCHARS + case CDOT|CMB: + if ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF) + continue; + return(0); +#endif /* REGEXP_H_WCHARS */ + + case CDOL: + if (*lp==0) + continue; + return(0); + + case CCEOF: + loc2 = (char *)lp; + return(1); + + case CCL: + c = *lp++ & 0377; + if(ISTHERE(c)) { + ep += 32; + continue; + } + return(0); + +#ifdef REGEXP_H_WCHARS + case CCL|CMB: + case CNCL|CMB: + c = regexp_h_fetch(lp, 1); + if (regexp_h_cclass(ep, c, (ep[-1] & 0377) == (CCL|CMB))) { + ep += (*ep & 0377) + 17; + continue; + } + return 0; +#endif /* REGEXP_H_WCHARS */ + + case CBRA: + braslist[*ep++ & 0377] = (char *)lp; + continue; + + case CKET: + braelist[*ep++ & 0377] = (char *)lp; + continue; + + case CBRC: + if (lp == regexp_h_bol && locs == NULL) + continue; + if ((isdigit(lp[0] & 0377) || regexp_h_uletter(lp[0] & 0377)) + && !regexp_h_uletter(lp[-1] & 0377) + && !isdigit(lp[-1] & 0377)) + continue; + return(0); + +#ifdef REGEXP_H_WCHARS + case CBRC|CMB: + c = regexp_h_show(lp); + d = regexp_h_previous(lp); + if ((iswdigit(c) || regexp_h_wuletter(c)) + && !regexp_h_wuletter(d) + && !iswdigit(d)) + continue; + return(0); +#endif /* REGEXP_H_WCHARS */ + + case CLET: + if (!regexp_h_uletter(lp[0] & 0377) && !isdigit(lp[0] & 0377)) + continue; + return(0); + +#ifdef REGEXP_H_WCHARS + case CLET|CMB: + c = regexp_h_show(lp); + if (!regexp_h_wuletter(c) && !iswdigit(c)) + continue; + return(0); +#endif /* REGEXP_H_WCHARS */ + + case CCHR|RNGE: + c = *ep++; + regexp_h_getrnge(ep, least); + while(low--) + if(*lp++ != c) + return(0); + curlp = lp; + while(size--) { + regexp_h_push(&sb, &sp, &sc, lp); + if(*lp++ != c) + break; + } + if(size < 0) { + regexp_h_push(&sb, &sp, &sc, lp); + lp++; + } + ep += 2; + goto star; + +#ifdef REGEXP_H_WCHARS + case CCHR|RNGE|CMB: + case CCH1|RNGE: + case CCH2|RNGE: + case CCH3|RNGE: + c = regexp_h_fetch(ep, 0); + regexp_h_getrnge(ep, least); + while (low--) + if (regexp_h_fetch(lp, 1) != c) + return 0; + curlp = lp; + while (size--) { + regexp_h_push(&sb, &sp, &sc, lp); + if (regexp_h_fetch(lp, 1) != c) + break; + } + if(size < 0) { + regexp_h_push(&sb, &sp, &sc, lp); + regexp_h_fetch(lp, 1); + } + ep += 2; + goto star; +#endif /* REGEXP_H_WCHARS */ + + case CDOT|RNGE: + regexp_h_getrnge(ep, least); + while(low--) + if(*lp++ == '\0') + return(0); + curlp = lp; + while(size--) { + regexp_h_push(&sb, &sp, &sc, lp); + if(*lp++ == '\0') + break; + } + if(size < 0) { + regexp_h_push(&sb, &sp, &sc, lp); + lp++; + } + ep += 2; + goto star; + +#ifdef REGEXP_H_WCHARS + case CDOT|RNGE|CMB: + regexp_h_getrnge(ep, least); + while (low--) + if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF) + return 0; + curlp = lp; + while (size--) { + regexp_h_push(&sb, &sp, &sc, lp); + if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF) + break; + } + if (size < 0) { + regexp_h_push(&sb, &sp, &sc, lp); + regexp_h_fetch(lp, 1); + } + ep += 2; + goto star; +#endif /* REGEXP_H_WCHARS */ + + case CCL|RNGE: + regexp_h_getrnge(ep + 32, least); + while(low--) { + c = *lp++ & 0377; + if(!ISTHERE(c)) + return(0); + } + curlp = lp; + while(size--) { + regexp_h_push(&sb, &sp, &sc, lp); + c = *lp++ & 0377; + if(!ISTHERE(c)) + break; + } + if(size < 0) { + regexp_h_push(&sb, &sp, &sc, lp); + lp++; + } + ep += 34; /* 32 + 2 */ + goto star; + +#ifdef REGEXP_H_WCHARS + case CCL|RNGE|CMB: + case CNCL|RNGE|CMB: + regexp_h_getrnge(ep + (*ep & 0377) + 17, least); + while (low--) { + c = regexp_h_fetch(lp, 1); + if (!regexp_h_cclass(ep, c, + (ep[-1] & 0377 & ~REGEXP_H_LEAST) + == (CCL|RNGE|CMB))) + return 0; + } + curlp = lp; + while (size--) { + regexp_h_push(&sb, &sp, &sc, lp); + c = regexp_h_fetch(lp, 1); + if (!regexp_h_cclass(ep, c, + (ep[-1] & 0377 & ~REGEXP_H_LEAST) + == (CCL|RNGE|CMB))) + break; + } + if (size < 0) { + regexp_h_push(&sb, &sp, &sc, lp); + regexp_h_fetch(lp, 1); + } + ep += (*ep & 0377) + 19; + goto star; +#endif /* REGEXP_H_WCHARS */ + + case CBACK: + bbeg = braslist[*ep & 0377]; + ct = braelist[*ep++ & 0377] - bbeg; + + if(strncmp(bbeg, lp, ct) == 0) { + lp += ct; + continue; + } + return(0); + + case CBACK|STAR: + bbeg = braslist[*ep & 0377]; + ct = braelist[*ep++ & 0377] - bbeg; + curlp = lp; + while(strncmp(bbeg, lp, ct) == 0) + lp += ct; + + while(lp >= curlp) { + if(regexp_h_advance(lp, ep)) return(1); + lp -= ct; + } + return(0); + + + case CDOT|STAR: + curlp = lp; + do + regexp_h_push(&sb, &sp, &sc, lp); + while (*lp++); + goto star; + +#ifdef REGEXP_H_WCHARS + case CDOT|STAR|CMB: + curlp = lp; + do + regexp_h_push(&sb, &sp, &sc, lp); + while ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF); + goto star; +#endif /* REGEXP_H_WCHARS */ + + case CCHR|STAR: + curlp = lp; + do + regexp_h_push(&sb, &sp, &sc, lp); + while (*lp++ == *ep); + ep++; + goto star; + +#ifdef REGEXP_H_WCHARS + case CCHR|STAR|CMB: + case CCH1|STAR: + case CCH2|STAR: + case CCH3|STAR: + curlp = lp; + d = regexp_h_fetch(ep, 0); + do + regexp_h_push(&sb, &sp, &sc, lp); + while (regexp_h_fetch(lp, 1) == d); + goto star; +#endif /* REGEXP_H_WCHARS */ + + case CCL|STAR: + curlp = lp; + do { + regexp_h_push(&sb, &sp, &sc, lp); + c = *lp++ & 0377; + } while(ISTHERE(c)); + ep += 32; + goto star; + +#ifdef REGEXP_H_WCHARS + case CCL|STAR|CMB: + case CNCL|STAR|CMB: + curlp = lp; + do { + regexp_h_push(&sb, &sp, &sc, lp); + c = regexp_h_fetch(lp, 1); + } while (regexp_h_cclass(ep, c, (ep[-1] & 0377) + == (CCL|STAR|CMB))); + ep += (*ep & 0377) + 17; + goto star; +#endif /* REGEXP_H_WCHARS */ + + star: +#ifdef REGEXP_H_WCHARS + if (regexp_h_wchars == 0) { +#endif + do { + if(--lp == locs) + break; + if (regexp_h_advance(lp, ep)) + return(1); + } while (lp > curlp); +#ifdef REGEXP_H_WCHARS + } else { + do { + lp = regexp_h_pop(&sp, &sc, lp); + if (lp <= locs) + break; + if (regexp_h_advance(lp, ep)) { + regexp_h_zerostak(&sb, &sp); + return(1); + } + } while (lp > curlp); + regexp_h_zerostak(&sb, &sp); + } +#endif /* REGEXP_H_WCHARS */ + return(0); + + } +} + +static void +regexp_h_getrnge(register const char *str, int least) +{ + low = *str++ & 0377; + size = least & REGEXP_H_LEAST ? /*20000*/INT_MAX : (*str & 0377) - low; +} + +int +advance(const char *lp, const char *ep) +{ + REGEXP_H_ADVANCE_INIT /* skip past circf */ + regexp_h_bol = lp; +#ifdef REGEXP_H_WCHARS + regexp_h_firstwc = NULL; +#endif /* REGEXP_H_WCHARS */ + return regexp_h_advance(lp, ep); +}