/* * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. * * Sccsid @(#)colldata.h 1.5 (gritter) 5/1/04 */ /* UNIX(R) Regular Expresssion Library * * Note: Code is released under the GNU LGPL * * Copyright (C) 2001 Caldera International, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to: * Free Software Foundation, Inc. * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef LIBUXRE_COLLDATA_H #define LIBUXRE_COLLDATA_H typedef struct { long coll_offst; /* offset to xnd table */ long sub_cnt; /* length of subnd table */ long sub_offst; /* offset to subnd table */ long str_offst; /* offset to strings for subnd table */ long flags; /* nonzero if reg.exp. used */ } hd; typedef struct { unsigned char ch; /* character or number of followers */ unsigned char pwt; /* primary weight */ unsigned char swt; /* secondary weight */ unsigned char ns; /* index of follower state list */ } xnd; typedef struct { char *exp; /* expression to be replaced */ long explen; /* length of expression */ char *repl; /* replacement string */ } subnd; /*----------------------------------*/ #include #include /* #include */ /* * Structure of a collation file: * 1. CollHead (maintbl is 0 if CHF_ENCODED) * if !CHF_ENCODED then * 2. CollElem[bytes] (256 for 8 bit bytes) * 3. if CHF_INDEXED then * CollElem[wides] (nmain-256 for 8 bit bytes) * else * CollMult[wides] * 4. CollMult[*] (none if multtbl is 0) * 5. wuchar_type[*] (none if repltbl is 0) * 6. CollSubn[*] (none if subntbl is 0) * 7. strings (first is pathname for .so if CHF_DYNAMIC) * * The actual location of parts 2 through 7 is not important. * * The main table is in encoded value order. * * All indeces/offsets must be nonzero to be effective; zero is reserved * to indicate no-such-entry. This implies either that an unused initial * entry is placed in each of (4) through (7), or that the "start offset" * given by the header is artificially pushed back by an entry size. * * Note that if CHF_ENCODED is not set, then nweight must be positive. * * If an element can begin a multiple character element, it contains a * nonzero multbeg which is the initial index into (4) for its list; * the list is terminated by a CollMult with a ch of zero. * * If there are elements with the same primary weight (weight[1]), then * for each such element, it must have a CollMult list. The CollMult * that terminates the list (ch==0) notes the lowest and highest basic * weights for those elements with that same primary weight value * respectively in weight[0] and weight[1]. If there are some basic * weights between these values that do not have the same primary * weight--are not in the equivalence class--then the terminator also * has a SUBN_SPECIAL mark. Note that this list terminator should be * shared when the elements are not multiple character collating * elements because they wouldn't otherwise have a CollMult list. * * WGHT_IGNORE is used to denote ignored collating elements for a * particular collation ordering pass. All main table entries other * than for '\0' will have a non-WGHT_IGNORE weight[0]. However, it is * possible for a CollMult entries from (4) to have a WGHT_IGNORE * weight[0]: If, for example, "xyz" is a multiple character collating * element, but "xy" is not, then the CollMult for "y" will have a * WGHT_IGNORE weight[0]. Also, WGHT_IGNORE is used to terminate each * list of replacement weights. * * Within (3), it is possible to describe a sequence of unremarkable * collating elements with a single CollMult entry. If the SUBN_SPECIAL * bit is set, the rest of subnbeg represents the number of collating * elements covered by this entry. The weight[0] values are determined * by adding the difference between the encoded value and the entry's ch * value to the entry's weight[0]. This value is then substituted for * any weight[n], n>0 that has only the WGHT_SPECIAL bit set. libuxre_collelem() * hides any match to such an entry by filling in a "spare" CollElem. * * If there are substitution strings, then for each character that begins * a string, it has a nonzero subnbeg which is similarly the initial * index into (6). The indeces in (6) refer to offsets within (7). */ #define TOPBIT(t) (((t)1) << (sizeof(t) * CHAR_BIT - 1)) #define CHF_ENCODED 0x1 /* collation by encoded values only */ #define CHF_INDEXED 0x2 /* main table indexed by encoded values */ #define CHF_MULTICH 0x4 /* a multiple char. coll. elem. exists */ #define CHF_DYNAMIC 0x8 /* shared object has collation functions */ #define CWF_BACKWARD 0x1 /* reversed ordering for this weight */ #define CWF_POSITION 0x2 /* weight takes position into account */ #define CLVERS 1 /* most recent version */ #define WGHT_IGNORE 0 /* ignore this collating element */ #define WGHT_SPECIAL TOPBIT(wuchar_type) #define SUBN_SPECIAL TOPBIT(unsigned short) #ifndef COLL_WEIGHTS_MAX #define COLL_WEIGHTS_MAX 1 #endif typedef struct { unsigned long maintbl; /* start of main table */ unsigned long multtbl; /* start of multi-char table */ unsigned long repltbl; /* start of replacement weights */ unsigned long subntbl; /* start of substitutions */ unsigned long strstbl; /* start of sub. strings */ unsigned long nmain; /* # entries in main table */ unsigned short flags; /* CHF_* bits */ unsigned short version; /* handle future changes */ unsigned char elemsize; /* # bytes/element (w/padding) */ unsigned char nweight; /* # weights/element */ unsigned char order[COLL_WEIGHTS_MAX]; /* CWF_* bits/weight */ } CollHead; typedef struct { unsigned short multbeg; /* start of multi-chars */ unsigned short subnbeg; /* start of substitutions */ wuchar_type weight[COLL_WEIGHTS_MAX]; } CollElem; typedef struct { wchar_t ch; /* "this" character (of sequence) */ CollElem elem; /* its full information */ } CollMult; typedef struct { unsigned short strbeg; /* start of match string */ unsigned short length; /* length of match string */ unsigned short repbeg; /* start of replacement */ } CollSubn; struct lc_collate { const unsigned char *strstbl; const wuchar_type *repltbl; const CollElem *maintbl; const CollMult *multtbl; const CollSubn *subntbl; #ifdef DSHLIB void *handle; void (*done)(struct lc_collate *); int (*strc)(struct lc_collate *, const char *, const char *); int (*wcsc)(struct lc_collate *, const wchar_t *, const wchar_t *); size_t (*strx)(struct lc_collate *, char *, const char *, size_t); size_t (*wcsx)(struct lc_collate *, wchar_t *, const wchar_t *, size_t); #endif const char *mapobj; size_t mapsize; unsigned long nmain; short nuse; unsigned short flags; unsigned char elemsize; unsigned char nweight; unsigned char order[COLL_WEIGHTS_MAX]; }; #define ELEM_BADCHAR ((CollElem *)0) #define ELEM_ENCODED ((CollElem *)-1) /* LIBUXRE_STATIC int libuxre_old_collate(struct lc_collate *); LIBUXRE_STATIC int libuxre_strqcoll(struct lc_collate *, const char *, const char *); LIBUXRE_STATIC int libuxre_wcsqcoll(struct lc_collate *, const wchar_t *, const wchar_t *); */ extern struct lc_collate *libuxre_lc_collate(struct lc_collate *); LIBUXRE_STATIC const CollElem *libuxre_collelem(struct lc_collate *, CollElem *, wchar_t); LIBUXRE_STATIC const CollElem *libuxre_collmult(struct lc_collate *, const CollElem *, wchar_t); /* LIBUXRE_STATIC const CollElem *libuxre_collmbs(struct lc_collate *, CollElem *, const unsigned char **); LIBUXRE_STATIC const CollElem *libuxre_collwcs(struct lc_collate *, CollElem *, const wchar_t **); */ #endif /* !LIBUXRE_COLLDATA_H */